github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/verification/requester/requester.go (about)

     1  package requester
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"time"
     7  
     8  	"github.com/rs/zerolog"
     9  	"go.opentelemetry.io/otel/attribute"
    10  	"golang.org/x/exp/rand"
    11  
    12  	"github.com/onflow/flow-go/engine"
    13  	"github.com/onflow/flow-go/engine/verification/fetcher"
    14  	"github.com/onflow/flow-go/model/flow"
    15  	"github.com/onflow/flow-go/model/messages"
    16  	"github.com/onflow/flow-go/model/verification"
    17  	"github.com/onflow/flow-go/module"
    18  	"github.com/onflow/flow-go/module/mempool"
    19  	"github.com/onflow/flow-go/module/trace"
    20  	"github.com/onflow/flow-go/network"
    21  	"github.com/onflow/flow-go/network/channels"
    22  	"github.com/onflow/flow-go/state/protocol"
    23  	"github.com/onflow/flow-go/utils/logging"
    24  )
    25  
    26  const (
    27  	// DefaultRequestInterval is the time interval that requester engine tries requesting chunk data packs.
    28  	DefaultRequestInterval = 1000 * time.Millisecond
    29  
    30  	// DefaultBackoffMultiplier is the base of exponent in exponential backoff multiplier for backing off requests for chunk data packs.
    31  	DefaultBackoffMultiplier = float64(2)
    32  
    33  	// DefaultBackoffMinInterval is the minimum time interval a chunk data pack request waits before dispatching.
    34  	DefaultBackoffMinInterval = 1000 * time.Millisecond
    35  
    36  	// DefaultBackoffMaxInterval is the maximum time interval a chunk data pack request waits before dispatching.
    37  	DefaultBackoffMaxInterval = 1 * time.Minute
    38  
    39  	// DefaultRequestTargets is the  maximum number of execution nodes a chunk data pack request is dispatched to.
    40  	DefaultRequestTargets = 2
    41  )
    42  
    43  // Engine implements a ChunkDataPackRequester that is responsible of receiving chunk data pack requests,
    44  // dispatching it to the execution nodes, receiving the requested chunk data pack from execution nodes,
    45  // and passing it to the registered handler.
    46  type Engine struct {
    47  	// common
    48  	log   zerolog.Logger
    49  	unit  *engine.Unit
    50  	state protocol.State  // used to check the last sealed height.
    51  	con   network.Conduit // used to send the chunk data request, and receive the response.
    52  
    53  	// monitoring
    54  	tracer  module.Tracer
    55  	metrics module.VerificationMetrics
    56  
    57  	// output interfaces
    58  	handler fetcher.ChunkDataPackHandler // contains callbacks for handling received chunk data packs.
    59  
    60  	// internal logic
    61  	retryInterval    time.Duration                          // determines time in milliseconds for retrying chunk data requests.
    62  	requestTargets   uint64                                 // maximum number of execution nodes being asked for a chunk data pack.
    63  	pendingRequests  mempool.ChunkRequests                  // used to track requested chunks.
    64  	reqQualifierFunc RequestQualifierFunc                   // used to decide whether to dispatch a request at a certain cycle.
    65  	reqUpdaterFunc   mempool.ChunkRequestHistoryUpdaterFunc // used to atomically update chunk request info on mempool.
    66  }
    67  
    68  func New(log zerolog.Logger,
    69  	state protocol.State,
    70  	net network.EngineRegistry,
    71  	tracer module.Tracer,
    72  	metrics module.VerificationMetrics,
    73  	pendingRequests mempool.ChunkRequests,
    74  	retryInterval time.Duration,
    75  	reqQualifierFunc RequestQualifierFunc,
    76  	reqUpdaterFunc mempool.ChunkRequestHistoryUpdaterFunc,
    77  	requestTargets uint64) (*Engine, error) {
    78  
    79  	e := &Engine{
    80  		log:              log.With().Str("engine", "requester").Logger(),
    81  		unit:             engine.NewUnit(),
    82  		state:            state,
    83  		tracer:           tracer,
    84  		metrics:          metrics,
    85  		retryInterval:    retryInterval,
    86  		requestTargets:   requestTargets,
    87  		pendingRequests:  pendingRequests,
    88  		reqUpdaterFunc:   reqUpdaterFunc,
    89  		reqQualifierFunc: reqQualifierFunc,
    90  	}
    91  
    92  	con, err := net.Register(channels.RequestChunks, e)
    93  	if err != nil {
    94  		return nil, fmt.Errorf("could not register chunk data pack provider engine: %w", err)
    95  	}
    96  	e.con = con
    97  
    98  	return e, nil
    99  }
   100  
   101  func (e *Engine) WithChunkDataPackHandler(handler fetcher.ChunkDataPackHandler) {
   102  	e.handler = handler
   103  }
   104  
   105  // SubmitLocal submits an event originating on the local node.
   106  func (e *Engine) SubmitLocal(event interface{}) {
   107  	e.log.Fatal().Msg("engine is not supposed to be invoked on SubmitLocal")
   108  }
   109  
   110  // Submit submits the given event from the node with the given origin ID
   111  // for processing in a non-blocking manner. It returns instantly and logs
   112  // a potential processing error internally when done.
   113  func (e *Engine) Submit(channel channels.Channel, originID flow.Identifier, event interface{}) {
   114  	e.unit.Launch(func() {
   115  		err := e.Process(channel, originID, event)
   116  		if err != nil {
   117  			engine.LogError(e.log, err)
   118  		}
   119  	})
   120  }
   121  
   122  // ProcessLocal processes an event originating on the local node.
   123  func (e *Engine) ProcessLocal(event interface{}) error {
   124  	return fmt.Errorf("should not invoke ProcessLocal of Match engine, use Process instead")
   125  }
   126  
   127  // Process processes the given event from the node with the given origin ID in
   128  // a blocking manner. It returns the potential processing error when done.
   129  func (e *Engine) Process(channel channels.Channel, originID flow.Identifier, event interface{}) error {
   130  	return e.unit.Do(func() error {
   131  		return e.process(originID, event)
   132  	})
   133  }
   134  
   135  // Ready initializes the engine and returns a channel that is closed when the initialization is done.
   136  func (e *Engine) Ready() <-chan struct{} {
   137  	delay := time.Duration(0)
   138  	// run a periodic check to retry requesting chunk data packs.
   139  	// if onTimer takes longer than retryInterval, the next call will be blocked until the previous
   140  	// call has finished.
   141  	// That being said, there won't be two onTimer running in parallel. See test cases for LaunchPeriodically.
   142  	e.unit.LaunchPeriodically(e.onTimer, e.retryInterval, delay)
   143  	return e.unit.Ready()
   144  }
   145  
   146  // Done terminates the engine and returns a channel that is closed when the termination is done
   147  func (e *Engine) Done() <-chan struct{} {
   148  	return e.unit.Done()
   149  }
   150  
   151  // process receives and submits an event to the engine for processing.
   152  // It returns an error so the engine will not propagate an event unless
   153  // it is successfully processed by the engine.
   154  // The origin ID indicates the node which originally submitted the event to
   155  // the peer-to-peer network.
   156  func (e *Engine) process(originID flow.Identifier, event interface{}) error {
   157  	switch resource := event.(type) {
   158  	case *messages.ChunkDataResponse:
   159  		e.handleChunkDataPackWithTracing(originID, &resource.ChunkDataPack)
   160  	default:
   161  		return fmt.Errorf("invalid event type (%T)", event)
   162  	}
   163  
   164  	return nil
   165  }
   166  
   167  // handleChunkDataPackWithTracing encapsulates the logic of handling a chunk data pack with tracing enabled.
   168  func (e *Engine) handleChunkDataPackWithTracing(originID flow.Identifier, chunkDataPack *flow.ChunkDataPack) {
   169  	// TODO: change this to block level as well
   170  	if chunkDataPack.Collection != nil {
   171  		span, _ := e.tracer.StartCollectionSpan(e.unit.Ctx(), chunkDataPack.Collection.ID(), trace.VERRequesterHandleChunkDataResponse)
   172  		span.SetAttributes(attribute.String("chunk_id", chunkDataPack.ChunkID.String()))
   173  		defer span.End()
   174  	}
   175  	e.handleChunkDataPack(originID, chunkDataPack)
   176  }
   177  
   178  // handleChunkDataPack sends the received chunk data pack to the registered handler, and cleans up its request status.
   179  func (e *Engine) handleChunkDataPack(originID flow.Identifier, chunkDataPack *flow.ChunkDataPack) {
   180  	chunkID := chunkDataPack.ChunkID
   181  	lg := e.log.With().
   182  		Hex("chunk_id", logging.ID(chunkID)).
   183  		Logger()
   184  
   185  	if chunkDataPack.Collection != nil {
   186  		collectionID := chunkDataPack.Collection.ID()
   187  		lg = lg.With().Hex("collection_id", logging.ID(collectionID)).Logger()
   188  	}
   189  	lg.Debug().Msg("chunk data pack received")
   190  
   191  	e.metrics.OnChunkDataPackResponseReceivedFromNetworkByRequester()
   192  
   193  	// makes sure we still need this chunk, and we will not process duplicate chunk data packs.
   194  	locators, removed := e.pendingRequests.PopAll(chunkID)
   195  	if !removed {
   196  		lg.Debug().Msg("chunk request status not found in mempool to be removed, dropping chunk")
   197  		return
   198  	}
   199  
   200  	for _, locator := range locators {
   201  		response := verification.ChunkDataPackResponse{
   202  			Locator: *locator,
   203  			Cdp:     chunkDataPack,
   204  		}
   205  
   206  		e.handler.HandleChunkDataPack(originID, &response)
   207  		e.metrics.OnChunkDataPackSentToFetcher()
   208  		lg.Info().
   209  			Hex("result_id", logging.ID(locator.ResultID)).
   210  			Uint64("chunk_index", locator.Index).
   211  			Msg("successfully sent the chunk data pack to the handler")
   212  	}
   213  }
   214  
   215  // Request receives a chunk data pack request and adds it into the pending requests mempool.
   216  func (e *Engine) Request(request *verification.ChunkDataPackRequest) {
   217  	added := e.pendingRequests.Add(request)
   218  
   219  	e.metrics.OnChunkDataPackRequestReceivedByRequester()
   220  
   221  	e.log.Info().
   222  		Hex("chunk_id", logging.ID(request.ChunkID)).
   223  		Uint64("block_height", request.Height).
   224  		Int("agree_executors", len(request.Agrees)).
   225  		Int("disagree_executors", len(request.Disagrees)).
   226  		Bool("added_to_pending_requests", added).
   227  		Msg("chunk data pack request arrived")
   228  }
   229  
   230  // onTimer should run periodically, it goes through all pending requests, and requests their chunk data pack.
   231  // It also retries the chunk data request if the data hasn't been received for a while.
   232  func (e *Engine) onTimer() {
   233  	pendingReqs := e.pendingRequests.All()
   234  
   235  	// keeps maximum attempts made on chunk data packs of the next unsealed height for telemetry
   236  	maxAttempts := uint64(0)
   237  
   238  	e.log.Debug().
   239  		Int("total", len(pendingReqs)).
   240  		Msg("start processing all pending chunk data requests")
   241  
   242  	lastSealed, err := e.state.Sealed().Head()
   243  	if err != nil {
   244  		e.log.Fatal().
   245  			Err(err).
   246  			Msg("could not determine whether block has been sealed")
   247  	}
   248  
   249  	for _, request := range pendingReqs {
   250  		attempts := e.handleChunkDataPackRequestWithTracing(request, lastSealed.Height)
   251  		if attempts > maxAttempts && request.Height == lastSealed.Height+uint64(1) {
   252  			maxAttempts = attempts
   253  		}
   254  	}
   255  
   256  	e.metrics.SetMaxChunkDataPackAttemptsForNextUnsealedHeightAtRequester(maxAttempts)
   257  }
   258  
   259  // handleChunkDataPackRequestWithTracing encapsulates the logic of dispatching chunk data request in network with tracing enabled.
   260  func (e *Engine) handleChunkDataPackRequestWithTracing(request *verification.ChunkDataPackRequestInfo, lastSealedHeight uint64) uint64 {
   261  	// TODO (Ramtin) - enable tracing later
   262  	ctx := e.unit.Ctx()
   263  	return e.handleChunkDataPackRequest(ctx, request, lastSealedHeight)
   264  }
   265  
   266  // handleChunkDataPackRequest encapsulates the logic of dispatching the chunk data pack request to the network.
   267  // The return value determines number of times this request has been dispatched.
   268  func (e *Engine) handleChunkDataPackRequest(ctx context.Context, request *verification.ChunkDataPackRequestInfo, lastSealedHeight uint64) uint64 {
   269  	lg := e.log.With().
   270  		Hex("chunk_id", logging.ID(request.ChunkID)).
   271  		Uint64("block_height", request.Height).
   272  		Logger()
   273  
   274  	// if block has been sealed, then we can finish
   275  	if request.Height <= lastSealedHeight {
   276  		locators, removed := e.pendingRequests.PopAll(request.ChunkID)
   277  
   278  		if !removed {
   279  			lg.Debug().Msg("chunk request status not found in mempool to be removed, drops requesting chunk of a sealed block")
   280  			return 0
   281  		}
   282  
   283  		for _, locator := range locators {
   284  			e.handler.NotifyChunkDataPackSealed(locator.Index, locator.ResultID)
   285  			lg.Info().
   286  				Hex("result_id", logging.ID(locator.ResultID)).
   287  				Uint64("chunk_index", locator.Index).
   288  				Msg("drops requesting chunk of a sealed block")
   289  		}
   290  
   291  		return 0
   292  	}
   293  
   294  	qualified := e.canDispatchRequest(request.ChunkID)
   295  	if !qualified {
   296  		lg.Debug().Msg("chunk data pack request is not qualified for dispatching at this round")
   297  		return 0
   298  	}
   299  
   300  	err := e.requestChunkDataPackWithTracing(ctx, request)
   301  	if err != nil {
   302  		lg.Error().Err(err).Msg("could not request chunk data pack")
   303  		return 0
   304  	}
   305  
   306  	attempts, lastAttempt, retryAfter, updated := e.onRequestDispatched(request.ChunkID)
   307  	lg.Info().
   308  		Bool("pending_request_updated", updated).
   309  		Uint64("attempts_made", attempts).
   310  		Time("last_attempt", lastAttempt).
   311  		Dur("retry_after", retryAfter).
   312  		Msg("chunk data pack requested")
   313  
   314  	return attempts
   315  }
   316  
   317  // requestChunkDataPack dispatches request for the chunk data pack to the execution nodes.
   318  func (e *Engine) requestChunkDataPackWithTracing(ctx context.Context, request *verification.ChunkDataPackRequestInfo) error {
   319  	var err error
   320  	e.tracer.WithSpanFromContext(ctx, trace.VERRequesterDispatchChunkDataRequest, func() {
   321  		err = e.requestChunkDataPack(request)
   322  	})
   323  	return err
   324  }
   325  
   326  // requestChunkDataPack dispatches request for the chunk data pack to the execution nodes.
   327  func (e *Engine) requestChunkDataPack(request *verification.ChunkDataPackRequestInfo) error {
   328  	req := &messages.ChunkDataRequest{
   329  		ChunkID: request.ChunkID,
   330  		Nonce:   rand.Uint64(), // prevent the request from being deduplicated by the receiver
   331  	}
   332  
   333  	// publishes the chunk data request to the network
   334  	targetIDs, err := request.SampleTargets(int(e.requestTargets))
   335  	if err != nil {
   336  		return fmt.Errorf("target sampling failed: %w", err)
   337  	}
   338  	err = e.con.Publish(req, targetIDs...)
   339  	if err != nil {
   340  		return fmt.Errorf("could not publish chunk data pack request for chunk (id=%s): %w", request.ChunkID, err)
   341  	}
   342  
   343  	return nil
   344  }
   345  
   346  // canDispatchRequest returns whether chunk data request for this chunk ID can be dispatched.
   347  func (e *Engine) canDispatchRequest(chunkID flow.Identifier) bool {
   348  	attempts, lastAttempt, retryAfter, exists := e.pendingRequests.RequestHistory(chunkID)
   349  	if !exists {
   350  		return false
   351  	}
   352  
   353  	return e.reqQualifierFunc(attempts, lastAttempt, retryAfter)
   354  }
   355  
   356  // onRequestDispatched encapsulates the logic of updating the chunk data request post a successful dispatch.
   357  func (e *Engine) onRequestDispatched(chunkID flow.Identifier) (uint64, time.Time, time.Duration, bool) {
   358  	e.metrics.OnChunkDataPackRequestDispatchedInNetworkByRequester()
   359  	return e.pendingRequests.UpdateRequestHistory(chunkID, e.reqUpdaterFunc)
   360  }