github.com/koko1123/flow-go-1@v0.29.6/engine/execution/provider/engine.go (about)

     1  package provider
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"math/rand"
     8  	"time"
     9  
    10  	"github.com/rs/zerolog"
    11  
    12  	"github.com/koko1123/flow-go-1/module/component"
    13  	"github.com/koko1123/flow-go-1/module/irrecoverable"
    14  	"github.com/koko1123/flow-go-1/module/mempool"
    15  
    16  	"github.com/koko1123/flow-go-1/engine"
    17  	"github.com/koko1123/flow-go-1/engine/execution/state"
    18  	"github.com/koko1123/flow-go-1/model/flow"
    19  	"github.com/koko1123/flow-go-1/model/flow/filter"
    20  	"github.com/koko1123/flow-go-1/model/messages"
    21  	"github.com/koko1123/flow-go-1/module"
    22  	"github.com/koko1123/flow-go-1/module/trace"
    23  	"github.com/koko1123/flow-go-1/network"
    24  	"github.com/koko1123/flow-go-1/network/channels"
    25  	"github.com/koko1123/flow-go-1/state/protocol"
    26  	"github.com/koko1123/flow-go-1/storage"
    27  	"github.com/koko1123/flow-go-1/utils/logging"
    28  )
    29  
    30  type ProviderEngine interface {
    31  	network.MessageProcessor
    32  	BroadcastExecutionReceipt(context.Context, *flow.ExecutionReceipt) error
    33  }
    34  
    35  const (
    36  	// DefaultChunkDataPackRequestWorker is the default number of concurrent workers processing chunk data pack requests on
    37  	// execution nodes.
    38  	DefaultChunkDataPackRequestWorker = 100
    39  	// DefaultChunkDataPackQueryTimeout is the default timeout value for querying a chunk data pack from storage.
    40  	DefaultChunkDataPackQueryTimeout = 10 * time.Second
    41  	// DefaultChunkDataPackDeliveryTimeout is the default timeout value for delivery of a chunk data pack to a verification
    42  	// node.
    43  	DefaultChunkDataPackDeliveryTimeout = 10 * time.Second
    44  )
    45  
    46  // An Engine provides means of accessing data about execution state and broadcasts execution receipts to nodes in the network.
    47  // Also generates and saves execution receipts
    48  type Engine struct {
    49  	component.Component
    50  	cm *component.ComponentManager
    51  
    52  	log                    zerolog.Logger
    53  	tracer                 module.Tracer
    54  	receiptCon             network.Conduit
    55  	state                  protocol.State
    56  	execState              state.ReadOnlyExecutionState
    57  	chunksConduit          network.Conduit
    58  	metrics                module.ExecutionMetrics
    59  	checkAuthorizedAtBlock func(blockID flow.Identifier) (bool, error)
    60  	chdpRequestHandler     *engine.MessageHandler
    61  	chdpRequestQueue       engine.MessageStore
    62  
    63  	// buffered channel for ChunkDataRequest workers to pick
    64  	// requests and process.
    65  	chdpRequestChannel chan *mempool.ChunkDataPackRequest
    66  
    67  	// timeout for delivery of a chunk data pack response in the network.
    68  	chunkDataPackDeliveryTimeout time.Duration
    69  	// timeout for querying chunk data pack through database.
    70  	chunkDataPackQueryTimeout time.Duration
    71  }
    72  
    73  func New(
    74  	logger zerolog.Logger,
    75  	tracer module.Tracer,
    76  	net network.Network,
    77  	state protocol.State,
    78  	execState state.ReadOnlyExecutionState,
    79  	metrics module.ExecutionMetrics,
    80  	checkAuthorizedAtBlock func(blockID flow.Identifier) (bool, error),
    81  	chunkDataPackRequestQueue engine.MessageStore,
    82  	chdpRequestWorkers uint,
    83  	chunkDataPackQueryTimeout time.Duration,
    84  	chunkDataPackDeliveryTimeout time.Duration,
    85  ) (*Engine, error) {
    86  
    87  	log := logger.With().Str("engine", "receipts").Logger()
    88  
    89  	handler := engine.NewMessageHandler(
    90  		log,
    91  		engine.NewNotifier(),
    92  		engine.Pattern{
    93  			// Match is called on every new message coming to this engine.
    94  			// Provider enigne only expects ChunkDataRequests.
    95  			// Other message types are discarded by Match.
    96  			Match: func(message *engine.Message) bool {
    97  				chdpReq, ok := message.Payload.(*messages.ChunkDataRequest)
    98  				if ok {
    99  					log.Info().
   100  						Hex("chunk_id", logging.ID(chdpReq.ChunkID)).
   101  						Hex("requester_id", logging.ID(message.OriginID)).
   102  						Msg("chunk data pack request received")
   103  				}
   104  				return ok
   105  			},
   106  			// Map is called on messages that are Match(ed) successfully, i.e.,
   107  			// ChunkDataRequests.
   108  			// It replaces the payload of message with requested chunk id.
   109  			Map: func(message *engine.Message) (*engine.Message, bool) {
   110  				chdpReq := message.Payload.(*messages.ChunkDataRequest)
   111  				return &engine.Message{
   112  					OriginID: message.OriginID,
   113  					Payload:  chdpReq.ChunkID,
   114  				}, true
   115  			},
   116  			Store: chunkDataPackRequestQueue,
   117  		})
   118  
   119  	engine := Engine{
   120  		log:                          log,
   121  		tracer:                       tracer,
   122  		state:                        state,
   123  		execState:                    execState,
   124  		metrics:                      metrics,
   125  		checkAuthorizedAtBlock:       checkAuthorizedAtBlock,
   126  		chdpRequestHandler:           handler,
   127  		chdpRequestQueue:             chunkDataPackRequestQueue,
   128  		chdpRequestChannel:           make(chan *mempool.ChunkDataPackRequest, chdpRequestWorkers),
   129  		chunkDataPackDeliveryTimeout: chunkDataPackDeliveryTimeout,
   130  		chunkDataPackQueryTimeout:    chunkDataPackQueryTimeout,
   131  	}
   132  
   133  	var err error
   134  
   135  	engine.receiptCon, err = net.Register(channels.PushReceipts, &engine)
   136  	if err != nil {
   137  		return nil, fmt.Errorf("could not register receipt provider engine: %w", err)
   138  	}
   139  
   140  	chunksConduit, err := net.Register(channels.ProvideChunks, &engine)
   141  	if err != nil {
   142  		return nil, fmt.Errorf("could not register chunk data pack provider engine: %w", err)
   143  	}
   144  	engine.chunksConduit = chunksConduit
   145  
   146  	cm := component.NewComponentManagerBuilder()
   147  	cm.AddWorker(engine.processQueuedChunkDataPackRequestsShovelerWorker)
   148  	for i := uint(0); i < chdpRequestWorkers; i++ {
   149  		cm.AddWorker(engine.processChunkDataPackRequestWorker)
   150  	}
   151  
   152  	engine.cm = cm.Build()
   153  	engine.Component = engine.cm
   154  
   155  	return &engine, nil
   156  }
   157  
   158  // processQueuedChunkDataPackRequestsShovelerWorker is constantly listening on the MessageHandler for ChunkDataRequests,
   159  // and pushes new ChunkDataRequests into the request channel to be picked by workers.
   160  func (e *Engine) processQueuedChunkDataPackRequestsShovelerWorker(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   161  	ready()
   162  
   163  	e.log.Debug().Msg("process chunk data pack request shovller worker started")
   164  
   165  	for {
   166  		select {
   167  		case <-e.chdpRequestHandler.GetNotifier():
   168  			// there is at list a single chunk data pack request queued up.
   169  			e.processAvailableMesssages(ctx)
   170  		case <-ctx.Done():
   171  			// close the internal channel, the workers will drain the channel before exiting
   172  			close(e.chdpRequestChannel)
   173  			e.log.Trace().Msg("processing chunk data pack request worker terminated")
   174  			return
   175  		}
   176  	}
   177  }
   178  
   179  // processAvailableMesssages is a blocking method that reads all queued ChunkDataRequests till the queue gets empty.
   180  // Each ChunkDataRequest is processed by a single concurrent worker. However, there are limited number of such workers.
   181  // If there is no worker available for a request, the method blocks till one is available.
   182  func (e *Engine) processAvailableMesssages(ctx irrecoverable.SignalerContext) {
   183  	for {
   184  		select {
   185  		case <-ctx.Done():
   186  			return
   187  		default:
   188  		}
   189  
   190  		msg, ok := e.chdpRequestQueue.Get()
   191  		if !ok {
   192  			// no more requests, return
   193  			return
   194  		}
   195  
   196  		chunkId, ok := msg.Payload.(flow.Identifier)
   197  		if !ok {
   198  			// should never happen.
   199  			// if it does happen, it means there is a bug in the queue implementation.
   200  			ctx.Throw(fmt.Errorf("invalid chunk id type in chunk data pack request queue: %T", msg.Payload))
   201  		}
   202  
   203  		request := &mempool.ChunkDataPackRequest{
   204  			RequesterId: msg.OriginID,
   205  			ChunkId:     chunkId,
   206  		}
   207  		lg := e.log.With().
   208  			Hex("chunk_id", logging.ID(request.ChunkId)).
   209  			Hex("origin_id", logging.ID(request.RequesterId)).Logger()
   210  
   211  		lg.Trace().Msg("shovller is queuing chunk data pack request for processing")
   212  		e.chdpRequestChannel <- request
   213  		lg.Trace().Msg("shovller queued up chunk data pack request for processing")
   214  	}
   215  }
   216  
   217  // processChunkDataPackRequestWorker encapsulates the logic of a single (concurrent) worker that picks a
   218  // ChunkDataRequest from this engine's queue and processes it.
   219  func (e *Engine) processChunkDataPackRequestWorker(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   220  	ready()
   221  
   222  	for {
   223  		request, ok := <-e.chdpRequestChannel
   224  		if !ok {
   225  			e.log.Trace().Msg("processing chunk data pack request worker terminated")
   226  			return
   227  		}
   228  		lg := e.log.With().
   229  			Hex("chunk_id", logging.ID(request.ChunkId)).
   230  			Hex("origin_id", logging.ID(request.RequesterId)).Logger()
   231  		lg.Trace().Msg("worker picked up chunk data pack request for processing")
   232  		e.onChunkDataRequest(request)
   233  		lg.Trace().Msg("worker finished chunk data pack processing")
   234  	}
   235  }
   236  
   237  func (e *Engine) Process(channel channels.Channel, originID flow.Identifier, event interface{}) error {
   238  	select {
   239  	case <-e.cm.ShutdownSignal():
   240  		e.log.Warn().Msgf("received message from %x after shut down", originID)
   241  		return nil
   242  	default:
   243  	}
   244  
   245  	err := e.chdpRequestHandler.Process(originID, event)
   246  	if err != nil {
   247  		if engine.IsIncompatibleInputTypeError(err) {
   248  			e.log.Warn().Msgf("%v delivered unsupported message %T through %v", originID, event, channel)
   249  			return nil
   250  		}
   251  		return fmt.Errorf("unexpected error while processing engine message: %w", err)
   252  	}
   253  
   254  	return nil
   255  }
   256  
   257  // onChunkDataRequest receives a request for a chunk data pack,
   258  // and if such a chunk data pack is available in the execution state, it is sent to the requester node.
   259  // TODO improve error handling https://github.com/dapperlabs/flow-go/issues/6363
   260  func (e *Engine) onChunkDataRequest(request *mempool.ChunkDataPackRequest) {
   261  	processStartTime := time.Now()
   262  
   263  	lg := e.log.With().
   264  		Hex("origin_id", logging.ID(request.RequesterId)).
   265  		Hex("chunk_id", logging.ID(request.ChunkId)).
   266  		Logger()
   267  	lg.Info().Msg("started processing chunk data pack request")
   268  
   269  	// increases collector metric
   270  	e.metrics.ChunkDataPackRequestProcessed()
   271  	chunkDataPack, err := e.execState.ChunkDataPackByChunkID(request.ChunkId)
   272  
   273  	// we might be behind when we don't have the requested chunk.
   274  	// if this happen, log it and return nil
   275  	if errors.Is(err, storage.ErrNotFound) {
   276  		lg.Warn().
   277  			Err(err).
   278  			Msg("chunk data pack not found, execution node may be behind")
   279  		return
   280  	}
   281  	if err != nil {
   282  		lg.Error().
   283  			Err(err).
   284  			Msg("could not retrieve chunk ID from storage")
   285  		return
   286  	}
   287  
   288  	queryTime := time.Since(processStartTime)
   289  	lg = lg.With().Dur("query_time", queryTime).Logger()
   290  	if queryTime > e.chunkDataPackQueryTimeout {
   291  		lg.Warn().
   292  			Dur("query_timout", e.chunkDataPackQueryTimeout).
   293  			Msg("chunk data pack query takes longer than expected timeout")
   294  	}
   295  
   296  	_, err = e.ensureAuthorized(chunkDataPack.ChunkID, request.RequesterId)
   297  	if err != nil {
   298  		lg.Error().
   299  			Err(err).
   300  			Msg("could not verify authorization of identity of chunk data pack request")
   301  		return
   302  	}
   303  
   304  	e.deliverChunkDataResponse(chunkDataPack, request.RequesterId)
   305  }
   306  
   307  // deliverChunkDataResponse delivers chunk data pack to the requester through network.
   308  func (e *Engine) deliverChunkDataResponse(chunkDataPack *flow.ChunkDataPack, requesterId flow.Identifier) {
   309  	lg := e.log.With().
   310  		Hex("origin_id", logging.ID(requesterId)).
   311  		Hex("chunk_id", logging.ID(chunkDataPack.ChunkID)).
   312  		Logger()
   313  	lg.Info().Msg("sending chunk data pack response")
   314  
   315  	// sends requested chunk data pack to the requester
   316  	deliveryStartTime := time.Now()
   317  
   318  	response := &messages.ChunkDataResponse{
   319  		ChunkDataPack: *chunkDataPack,
   320  		Nonce:         rand.Uint64(),
   321  	}
   322  
   323  	err := e.chunksConduit.Unicast(response, requesterId)
   324  	if err != nil {
   325  		lg.Warn().
   326  			Err(err).
   327  			Msg("could not send requested chunk data pack to requester")
   328  		return
   329  	}
   330  
   331  	deliveryTime := time.Since(deliveryStartTime)
   332  	lg = lg.With().Dur("delivery_time", deliveryTime).Logger()
   333  	if deliveryTime > e.chunkDataPackDeliveryTimeout {
   334  		lg.Warn().
   335  			Dur("delivery_timout", e.chunkDataPackDeliveryTimeout).
   336  			Msg("chunk data pack delivery takes longer than expected timeout")
   337  	}
   338  
   339  	if chunkDataPack.Collection != nil {
   340  		// logging collection id of non-system chunks.
   341  		// A system chunk has both the collection and collection id set to nil.
   342  		lg = lg.With().
   343  			Hex("collection_id", logging.ID(chunkDataPack.Collection.ID())).
   344  			Logger()
   345  	}
   346  	lg.Info().Msg("chunk data pack request successfully replied")
   347  }
   348  
   349  func (e *Engine) ensureAuthorized(chunkID flow.Identifier, originID flow.Identifier) (*flow.Identity, error) {
   350  	blockID, err := e.execState.GetBlockIDByChunkID(chunkID)
   351  	if err != nil {
   352  		return nil, engine.NewInvalidInputErrorf("cannot find blockID corresponding to chunk data pack: %w", err)
   353  	}
   354  
   355  	authorizedAt, err := e.checkAuthorizedAtBlock(blockID)
   356  	if err != nil {
   357  		return nil, engine.NewInvalidInputErrorf("cannot check block staking status: %w", err)
   358  	}
   359  	if !authorizedAt {
   360  		return nil, engine.NewInvalidInputErrorf("this node is not authorized at the block (%s) corresponding to chunk data pack (%s)", blockID.String(), chunkID.String())
   361  	}
   362  
   363  	origin, err := e.state.AtBlockID(blockID).Identity(originID)
   364  	if err != nil {
   365  		return nil, engine.NewInvalidInputErrorf("invalid origin id (%s): %w", origin, err)
   366  	}
   367  
   368  	// only verifier nodes are allowed to request chunk data packs
   369  	if origin.Role != flow.RoleVerification {
   370  		return nil, engine.NewInvalidInputErrorf("invalid role for receiving collection: %s", origin.Role)
   371  	}
   372  
   373  	if origin.Weight == 0 {
   374  		return nil, engine.NewInvalidInputErrorf("node %s has zero weight at the block (%s) corresponding to chunk data pack (%s)", originID, blockID.String(), chunkID.String())
   375  	}
   376  	return origin, nil
   377  }
   378  
   379  func (e *Engine) BroadcastExecutionReceipt(ctx context.Context, receipt *flow.ExecutionReceipt) error {
   380  	finalState, err := receipt.ExecutionResult.FinalStateCommitment()
   381  	if err != nil {
   382  		return fmt.Errorf("could not get final state: %w", err)
   383  	}
   384  
   385  	span, _ := e.tracer.StartSpanFromContext(ctx, trace.EXEBroadcastExecutionReceipt)
   386  	defer span.End()
   387  
   388  	e.log.Debug().
   389  		Hex("block_id", logging.ID(receipt.ExecutionResult.BlockID)).
   390  		Hex("receipt_id", logging.Entity(receipt)).
   391  		Hex("final_state", finalState[:]).
   392  		Msg("broadcasting execution receipt")
   393  
   394  	identities, err := e.state.Final().Identities(filter.HasRole(flow.RoleAccess, flow.RoleConsensus,
   395  		flow.RoleVerification))
   396  	if err != nil {
   397  		return fmt.Errorf("could not get consensus and verification identities: %w", err)
   398  	}
   399  
   400  	err = e.receiptCon.Publish(receipt, identities.NodeIDs()...)
   401  	if err != nil {
   402  		return fmt.Errorf("could not submit execution receipts: %w", err)
   403  	}
   404  
   405  	return nil
   406  }