github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/execution/provider/engine.go (about)

     1  package provider
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"time"
     8  
     9  	"github.com/rs/zerolog"
    10  
    11  	"github.com/onflow/flow-go/module/component"
    12  	"github.com/onflow/flow-go/module/irrecoverable"
    13  	"github.com/onflow/flow-go/module/mempool"
    14  
    15  	"github.com/onflow/flow-go/engine"
    16  	"github.com/onflow/flow-go/engine/execution/state"
    17  	"github.com/onflow/flow-go/model/flow"
    18  	"github.com/onflow/flow-go/model/flow/filter"
    19  	"github.com/onflow/flow-go/model/messages"
    20  	"github.com/onflow/flow-go/module"
    21  	"github.com/onflow/flow-go/module/trace"
    22  	"github.com/onflow/flow-go/network"
    23  	"github.com/onflow/flow-go/network/channels"
    24  	"github.com/onflow/flow-go/state/protocol"
    25  	"github.com/onflow/flow-go/storage"
    26  	"github.com/onflow/flow-go/utils/logging"
    27  	"github.com/onflow/flow-go/utils/rand"
    28  )
    29  
    30  type ProviderEngine interface {
    31  	network.MessageProcessor
    32  	module.ReadyDoneAware
    33  	// BroadcastExecutionReceipt broadcasts an execution receipt to all nodes in the network.
    34  	// It skips broadcasting the receipt if the block is sealed, or the node is not authorized at the block.
    35  	// It returns true if the receipt is broadcasted, false otherwise.
    36  	BroadcastExecutionReceipt(context.Context, uint64, *flow.ExecutionReceipt) (bool, error)
    37  }
    38  
    39  type NoopEngine struct {
    40  	module.NoopReadyDoneAware
    41  }
    42  
    43  func (*NoopEngine) Process(channel channels.Channel, originID flow.Identifier, message interface{}) error {
    44  	return nil
    45  }
    46  
    47  func (*NoopEngine) BroadcastExecutionReceipt(context.Context, uint64, *flow.ExecutionReceipt) (bool, error) {
    48  	return false, nil
    49  }
    50  
    51  const (
    52  	// DefaultChunkDataPackRequestWorker is the default number of concurrent workers processing chunk data pack requests on
    53  	// execution nodes.
    54  	DefaultChunkDataPackRequestWorker = 100
    55  	// DefaultChunkDataPackQueryTimeout is the default timeout value for querying a chunk data pack from storage.
    56  	DefaultChunkDataPackQueryTimeout = 10 * time.Second
    57  	// DefaultChunkDataPackDeliveryTimeout is the default timeout value for delivery of a chunk data pack to a verification
    58  	// node.
    59  	DefaultChunkDataPackDeliveryTimeout = 10 * time.Second
    60  )
    61  
    62  var _ ProviderEngine = (*Engine)(nil)
    63  
    64  // An Engine provides means of accessing data about execution state and broadcasts execution receipts to nodes in the network.
    65  // Also generates and saves execution receipts
    66  type Engine struct {
    67  	component.Component
    68  	cm *component.ComponentManager
    69  
    70  	log                    zerolog.Logger
    71  	tracer                 module.Tracer
    72  	receiptCon             network.Conduit
    73  	state                  protocol.State
    74  	execState              state.ReadOnlyExecutionState
    75  	chunksConduit          network.Conduit
    76  	metrics                module.ExecutionMetrics
    77  	checkAuthorizedAtBlock func(blockID flow.Identifier) (bool, error)
    78  	chdpRequestHandler     *engine.MessageHandler
    79  	chdpRequestQueue       engine.MessageStore
    80  
    81  	// buffered channel for ChunkDataRequest workers to pick
    82  	// requests and process.
    83  	chdpRequestChannel chan *mempool.ChunkDataPackRequest
    84  
    85  	// timeout for delivery of a chunk data pack response in the network.
    86  	chunkDataPackDeliveryTimeout time.Duration
    87  	// timeout for querying chunk data pack through database.
    88  	chunkDataPackQueryTimeout time.Duration
    89  }
    90  
    91  func New(
    92  	logger zerolog.Logger,
    93  	tracer module.Tracer,
    94  	net network.EngineRegistry,
    95  	state protocol.State,
    96  	execState state.ReadOnlyExecutionState,
    97  	metrics module.ExecutionMetrics,
    98  	checkAuthorizedAtBlock func(blockID flow.Identifier) (bool, error),
    99  	chunkDataPackRequestQueue engine.MessageStore,
   100  	chdpRequestWorkers uint,
   101  	chunkDataPackQueryTimeout time.Duration,
   102  	chunkDataPackDeliveryTimeout time.Duration,
   103  ) (*Engine, error) {
   104  
   105  	log := logger.With().Str("engine", "receipts").Logger()
   106  
   107  	handler := engine.NewMessageHandler(
   108  		log,
   109  		engine.NewNotifier(),
   110  		engine.Pattern{
   111  			// Match is called on every new message coming to this engine.
   112  			// Provider enigne only expects ChunkDataRequests.
   113  			// Other message types are discarded by Match.
   114  			Match: func(message *engine.Message) bool {
   115  				chdpReq, ok := message.Payload.(*messages.ChunkDataRequest)
   116  				if ok {
   117  					log.Info().
   118  						Hex("chunk_id", logging.ID(chdpReq.ChunkID)).
   119  						Hex("requester_id", logging.ID(message.OriginID)).
   120  						Msg("chunk data pack request received")
   121  				}
   122  				return ok
   123  			},
   124  			// Map is called on messages that are Match(ed) successfully, i.e.,
   125  			// ChunkDataRequests.
   126  			// It replaces the payload of message with requested chunk id.
   127  			Map: func(message *engine.Message) (*engine.Message, bool) {
   128  				chdpReq := message.Payload.(*messages.ChunkDataRequest)
   129  				return &engine.Message{
   130  					OriginID: message.OriginID,
   131  					Payload:  chdpReq.ChunkID,
   132  				}, true
   133  			},
   134  			Store: chunkDataPackRequestQueue,
   135  		})
   136  
   137  	engine := Engine{
   138  		log:                          log,
   139  		tracer:                       tracer,
   140  		state:                        state,
   141  		execState:                    execState,
   142  		metrics:                      metrics,
   143  		checkAuthorizedAtBlock:       checkAuthorizedAtBlock,
   144  		chdpRequestHandler:           handler,
   145  		chdpRequestQueue:             chunkDataPackRequestQueue,
   146  		chdpRequestChannel:           make(chan *mempool.ChunkDataPackRequest, chdpRequestWorkers),
   147  		chunkDataPackDeliveryTimeout: chunkDataPackDeliveryTimeout,
   148  		chunkDataPackQueryTimeout:    chunkDataPackQueryTimeout,
   149  	}
   150  
   151  	var err error
   152  
   153  	engine.receiptCon, err = net.Register(channels.PushReceipts, &engine)
   154  	if err != nil {
   155  		return nil, fmt.Errorf("could not register receipt provider engine: %w", err)
   156  	}
   157  
   158  	chunksConduit, err := net.Register(channels.ProvideChunks, &engine)
   159  	if err != nil {
   160  		return nil, fmt.Errorf("could not register chunk data pack provider engine: %w", err)
   161  	}
   162  	engine.chunksConduit = chunksConduit
   163  
   164  	cm := component.NewComponentManagerBuilder()
   165  	cm.AddWorker(engine.processQueuedChunkDataPackRequestsShovelerWorker)
   166  	for i := uint(0); i < chdpRequestWorkers; i++ {
   167  		cm.AddWorker(engine.processChunkDataPackRequestWorker)
   168  	}
   169  
   170  	engine.cm = cm.Build()
   171  	engine.Component = engine.cm
   172  
   173  	return &engine, nil
   174  }
   175  
   176  // processQueuedChunkDataPackRequestsShovelerWorker is constantly listening on the MessageHandler for ChunkDataRequests,
   177  // and pushes new ChunkDataRequests into the request channel to be picked by workers.
   178  func (e *Engine) processQueuedChunkDataPackRequestsShovelerWorker(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   179  	ready()
   180  
   181  	e.log.Debug().Msg("process chunk data pack request shovller worker started")
   182  
   183  	for {
   184  		select {
   185  		case <-e.chdpRequestHandler.GetNotifier():
   186  			// there is at list a single chunk data pack request queued up.
   187  			e.processAvailableMesssages(ctx)
   188  		case <-ctx.Done():
   189  			// close the internal channel, the workers will drain the channel before exiting
   190  			close(e.chdpRequestChannel)
   191  			e.log.Trace().Msg("processing chunk data pack request worker terminated")
   192  			return
   193  		}
   194  	}
   195  }
   196  
   197  // processAvailableMesssages is a blocking method that reads all queued ChunkDataRequests till the queue gets empty.
   198  // Each ChunkDataRequest is processed by a single concurrent worker. However, there are limited number of such workers.
   199  // If there is no worker available for a request, the method blocks till one is available.
   200  func (e *Engine) processAvailableMesssages(ctx irrecoverable.SignalerContext) {
   201  	for {
   202  		select {
   203  		case <-ctx.Done():
   204  			return
   205  		default:
   206  		}
   207  
   208  		msg, ok := e.chdpRequestQueue.Get()
   209  		if !ok {
   210  			// no more requests, return
   211  			return
   212  		}
   213  
   214  		chunkId, ok := msg.Payload.(flow.Identifier)
   215  		if !ok {
   216  			// should never happen.
   217  			// if it does happen, it means there is a bug in the queue implementation.
   218  			ctx.Throw(fmt.Errorf("invalid chunk id type in chunk data pack request queue: %T", msg.Payload))
   219  		}
   220  
   221  		request := &mempool.ChunkDataPackRequest{
   222  			RequesterId: msg.OriginID,
   223  			ChunkId:     chunkId,
   224  		}
   225  		lg := e.log.With().
   226  			Hex("chunk_id", logging.ID(request.ChunkId)).
   227  			Hex("origin_id", logging.ID(request.RequesterId)).Logger()
   228  
   229  		lg.Trace().Msg("shovller is queuing chunk data pack request for processing")
   230  		e.chdpRequestChannel <- request
   231  		lg.Trace().Msg("shovller queued up chunk data pack request for processing")
   232  	}
   233  }
   234  
   235  // processChunkDataPackRequestWorker encapsulates the logic of a single (concurrent) worker that picks a
   236  // ChunkDataRequest from this engine's queue and processes it.
   237  func (e *Engine) processChunkDataPackRequestWorker(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   238  	ready()
   239  
   240  	for {
   241  		request, ok := <-e.chdpRequestChannel
   242  		if !ok {
   243  			e.log.Trace().Msg("processing chunk data pack request worker terminated")
   244  			return
   245  		}
   246  		lg := e.log.With().
   247  			Hex("chunk_id", logging.ID(request.ChunkId)).
   248  			Hex("origin_id", logging.ID(request.RequesterId)).Logger()
   249  		lg.Trace().Msg("worker picked up chunk data pack request for processing")
   250  		e.onChunkDataRequest(request)
   251  		lg.Trace().Msg("worker finished chunk data pack processing")
   252  	}
   253  }
   254  
   255  func (e *Engine) Process(channel channels.Channel, originID flow.Identifier, event interface{}) error {
   256  	select {
   257  	case <-e.cm.ShutdownSignal():
   258  		e.log.Warn().Msgf("received message from %x after shut down", originID)
   259  		return nil
   260  	default:
   261  	}
   262  
   263  	err := e.chdpRequestHandler.Process(originID, event)
   264  	if err != nil {
   265  		if engine.IsIncompatibleInputTypeError(err) {
   266  			e.log.Warn().Msgf("%v delivered unsupported message %T through %v", originID, event, channel)
   267  			return nil
   268  		}
   269  		return fmt.Errorf("unexpected error while processing engine message: %w", err)
   270  	}
   271  
   272  	return nil
   273  }
   274  
   275  // onChunkDataRequest receives a request for a chunk data pack,
   276  // and if such a chunk data pack is available in the execution state, it is sent to the requester node.
   277  // TODO improve error handling https://github.com/dapperlabs/flow-go/issues/6363
   278  func (e *Engine) onChunkDataRequest(request *mempool.ChunkDataPackRequest) {
   279  	processStartTime := time.Now()
   280  
   281  	lg := e.log.With().
   282  		Hex("origin_id", logging.ID(request.RequesterId)).
   283  		Hex("chunk_id", logging.ID(request.ChunkId)).
   284  		Logger()
   285  	lg.Info().Msg("started processing chunk data pack request")
   286  
   287  	// TODO(ramtin): we might add a future logic to do extra checks on the origin of the request
   288  	// currently the networking layer checks that the requested is a valid node operator
   289  	// that has not been ejected.
   290  
   291  	// increases collector metric
   292  	e.metrics.ChunkDataPackRequestProcessed()
   293  	chunkDataPack, err := e.execState.ChunkDataPackByChunkID(request.ChunkId)
   294  
   295  	// we might be behind when we don't have the requested chunk.
   296  	// if this happen, log it and return nil
   297  	if errors.Is(err, storage.ErrNotFound) {
   298  		lg.Warn().
   299  			Err(err).
   300  			Msg("chunk data pack not found, execution node may be behind")
   301  		return
   302  	}
   303  	if err != nil {
   304  		lg.Error().
   305  			Err(err).
   306  			Msg("could not retrieve chunk ID from storage")
   307  		return
   308  	}
   309  
   310  	queryTime := time.Since(processStartTime)
   311  	lg = lg.With().Dur("query_time", queryTime).Logger()
   312  	if queryTime > e.chunkDataPackQueryTimeout {
   313  		lg.Warn().
   314  			Dur("query_timout", e.chunkDataPackQueryTimeout).
   315  			Msg("chunk data pack query takes longer than expected timeout")
   316  	}
   317  
   318  	e.deliverChunkDataResponse(chunkDataPack, request.RequesterId)
   319  }
   320  
   321  // deliverChunkDataResponse delivers chunk data pack to the requester through network.
   322  func (e *Engine) deliverChunkDataResponse(chunkDataPack *flow.ChunkDataPack, requesterId flow.Identifier) {
   323  	lg := e.log.With().
   324  		Hex("origin_id", logging.ID(requesterId)).
   325  		Hex("chunk_id", logging.ID(chunkDataPack.ChunkID)).
   326  		Logger()
   327  	lg.Info().Msg("sending chunk data pack response")
   328  
   329  	// sends requested chunk data pack to the requester
   330  	deliveryStartTime := time.Now()
   331  
   332  	nonce, err := rand.Uint64()
   333  	if err != nil {
   334  		// TODO: this error should be returned by deliverChunkDataResponse
   335  		// it is logged for now since the only error possible is related to a failure
   336  		// of the system entropy generation. Such error is going to cause failures in other
   337  		// components where it's handled properly and will lead to crashing the module.
   338  		lg.Error().
   339  			Err(err).
   340  			Msg("could not generate nonce for chunk data response")
   341  		return
   342  	}
   343  
   344  	response := &messages.ChunkDataResponse{
   345  		ChunkDataPack: *chunkDataPack,
   346  		Nonce:         nonce,
   347  	}
   348  
   349  	err = e.chunksConduit.Unicast(response, requesterId)
   350  	if err != nil {
   351  		lg.Warn().
   352  			Err(err).
   353  			Msg("could not send requested chunk data pack to requester")
   354  		return
   355  	}
   356  
   357  	deliveryTime := time.Since(deliveryStartTime)
   358  	lg = lg.With().Dur("delivery_time", deliveryTime).Logger()
   359  	if deliveryTime > e.chunkDataPackDeliveryTimeout {
   360  		lg.Warn().
   361  			Dur("delivery_timout", e.chunkDataPackDeliveryTimeout).
   362  			Msg("chunk data pack delivery takes longer than expected timeout")
   363  	}
   364  
   365  	if chunkDataPack.Collection != nil {
   366  		// logging collection id of non-system chunks.
   367  		// A system chunk has both the collection and collection id set to nil.
   368  		lg = lg.With().
   369  			Hex("collection_id", logging.ID(chunkDataPack.Collection.ID())).
   370  			Logger()
   371  	}
   372  	lg.Info().Msg("chunk data pack request successfully replied")
   373  }
   374  
   375  // BroadcastExecutionReceipt broadcasts an execution receipt to all nodes in the network.
   376  // It skips broadcasting the receipt if the block is sealed, or the node is not authorized at the block.
   377  // It returns true if the receipt is broadcasted, false otherwise.
   378  func (e *Engine) BroadcastExecutionReceipt(ctx context.Context, height uint64, receipt *flow.ExecutionReceipt) (bool, error) {
   379  	// if the receipt is for a sealed block, then no need to broadcast it.
   380  	lastSealed, err := e.state.Sealed().Head()
   381  	if err != nil {
   382  		return false, fmt.Errorf("could not get sealed block before broadcasting: %w", err)
   383  	}
   384  
   385  	isExecutedBlockSealed := height <= lastSealed.Height
   386  
   387  	if isExecutedBlockSealed {
   388  		// no need to braodcast the receipt if the block is sealed
   389  		return false, nil
   390  	}
   391  
   392  	blockID := receipt.ExecutionResult.BlockID
   393  	authorizedAtBlock, err := e.checkAuthorizedAtBlock(blockID)
   394  	if err != nil {
   395  		return false, fmt.Errorf("could not check staking status: %w", err)
   396  	}
   397  
   398  	if !authorizedAtBlock {
   399  		return false, nil
   400  	}
   401  
   402  	finalState, err := receipt.ExecutionResult.FinalStateCommitment()
   403  	if err != nil {
   404  		return false, fmt.Errorf("could not get final state: %w", err)
   405  	}
   406  
   407  	span, _ := e.tracer.StartSpanFromContext(ctx, trace.EXEBroadcastExecutionReceipt)
   408  	defer span.End()
   409  
   410  	e.log.Debug().
   411  		Hex("block_id", logging.ID(receipt.ExecutionResult.BlockID)).
   412  		Hex("receipt_id", logging.Entity(receipt)).
   413  		Hex("final_state", finalState[:]).
   414  		Msg("broadcasting execution receipt")
   415  
   416  	identities, err := e.state.Final().Identities(filter.HasRole[flow.Identity](flow.RoleAccess, flow.RoleConsensus,
   417  		flow.RoleVerification))
   418  	if err != nil {
   419  		return false, fmt.Errorf("could not get consensus and verification identities: %w", err)
   420  	}
   421  
   422  	err = e.receiptCon.Publish(receipt, identities.NodeIDs()...)
   423  	if err != nil {
   424  		return false, fmt.Errorf("could not submit execution receipts: %w", err)
   425  	}
   426  
   427  	return true, nil
   428  }