github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/common/synchronization/engine.go

github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/common/synchronization/engine.go (about)

     1  package synchronization
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"time"
     7  
     8  	"github.com/hashicorp/go-multierror"
     9  	"github.com/rs/zerolog"
    10  
    11  	"github.com/onflow/flow-go/consensus/hotstuff"
    12  	"github.com/onflow/flow-go/engine"
    13  	"github.com/onflow/flow-go/engine/common/fifoqueue"
    14  	"github.com/onflow/flow-go/engine/consensus"
    15  	"github.com/onflow/flow-go/model/chainsync"
    16  	"github.com/onflow/flow-go/model/flow"
    17  	"github.com/onflow/flow-go/model/messages"
    18  	"github.com/onflow/flow-go/module"
    19  	synccore "github.com/onflow/flow-go/module/chainsync"
    20  	"github.com/onflow/flow-go/module/component"
    21  	"github.com/onflow/flow-go/module/events"
    22  	"github.com/onflow/flow-go/module/irrecoverable"
    23  	"github.com/onflow/flow-go/module/metrics"
    24  	"github.com/onflow/flow-go/network"
    25  	"github.com/onflow/flow-go/network/alsp"
    26  	"github.com/onflow/flow-go/network/channels"
    27  	"github.com/onflow/flow-go/state/protocol"
    28  	"github.com/onflow/flow-go/storage"
    29  	"github.com/onflow/flow-go/utils/logging"
    30  	"github.com/onflow/flow-go/utils/rand"
    31  )
    32  
    33  // defaultSyncResponseQueueCapacity maximum capacity of sync responses queue
    34  const defaultSyncResponseQueueCapacity = 500
    35  
    36  // defaultBlockResponseQueueCapacity maximum capacity of block responses queue
    37  const defaultBlockResponseQueueCapacity = 500
    38  
    39  // Engine is the synchronization engine, responsible for synchronizing chain state.
    40  type Engine struct {
    41  	component.Component
    42  	hotstuff.FinalizationConsumer
    43  
    44  	log                  zerolog.Logger
    45  	metrics              module.EngineMetrics
    46  	me                   module.Local
    47  	finalizedHeaderCache module.FinalizedHeaderCache
    48  	con                  network.Conduit
    49  	blocks               storage.Blocks
    50  	comp                 consensus.Compliance
    51  
    52  	pollInterval         time.Duration
    53  	scanInterval         time.Duration
    54  	core                 module.SyncCore
    55  	participantsProvider module.IdentifierProvider
    56  
    57  	requestHandler      *RequestHandler // component responsible for handling requests
    58  	spamDetectionConfig *SpamDetectionConfig
    59  
    60  	pendingSyncResponses   engine.MessageStore    // message store for *message.SyncResponse
    61  	pendingBlockResponses  engine.MessageStore    // message store for *message.BlockResponse
    62  	responseMessageHandler *engine.MessageHandler // message handler responsible for response processing
    63  }
    64  
    65  var _ network.MessageProcessor = (*Engine)(nil)
    66  var _ component.Component = (*Engine)(nil)
    67  
    68  // New creates a new main chain synchronization engine.
    69  func New(
    70  	log zerolog.Logger,
    71  	metrics module.EngineMetrics,
    72  	net network.EngineRegistry,
    73  	me module.Local,
    74  	state protocol.State,
    75  	blocks storage.Blocks,
    76  	comp consensus.Compliance,
    77  	core module.SyncCore,
    78  	participantsProvider module.IdentifierProvider,
    79  	spamDetectionConfig *SpamDetectionConfig,
    80  	opts ...OptionFunc,
    81  ) (*Engine, error) {
    82  
    83  	opt := DefaultConfig()
    84  	for _, f := range opts {
    85  		f(opt)
    86  	}
    87  
    88  	if comp == nil {
    89  		panic("must initialize synchronization engine with comp engine")
    90  	}
    91  
    92  	finalizedHeaderCache, finalizedCacheWorker, err := events.NewFinalizedHeaderCache(state)
    93  	if err != nil {
    94  		return nil, fmt.Errorf("could not create finalized header cache: %w", err)
    95  	}
    96  
    97  	// initialize the propagation engine with its dependencies
    98  	e := &Engine{
    99  		FinalizationConsumer: finalizedHeaderCache,
   100  		log:                  log.With().Str("engine", "synchronization").Logger(),
   101  		metrics:              metrics,
   102  		me:                   me,
   103  		finalizedHeaderCache: finalizedHeaderCache,
   104  		blocks:               blocks,
   105  		comp:                 comp,
   106  		core:                 core,
   107  		pollInterval:         opt.PollInterval,
   108  		scanInterval:         opt.ScanInterval,
   109  		participantsProvider: participantsProvider,
   110  		spamDetectionConfig:  spamDetectionConfig,
   111  	}
   112  
   113  	// register the engine with the network layer and store the conduit
   114  	con, err := net.Register(channels.SyncCommittee, e)
   115  	if err != nil {
   116  		return nil, fmt.Errorf("could not register engine: %w", err)
   117  	}
   118  	e.con = con
   119  	e.requestHandler = NewRequestHandler(log, metrics, NewResponseSender(con), me, finalizedHeaderCache, blocks, core, true)
   120  
   121  	// set up worker routines
   122  	builder := component.NewComponentManagerBuilder().
   123  		AddWorker(finalizedCacheWorker).
   124  		AddWorker(e.checkLoop).
   125  		AddWorker(e.responseProcessingLoop)
   126  	for i := 0; i < defaultEngineRequestsWorkers; i++ {
   127  		builder.AddWorker(e.requestHandler.requestProcessingWorker)
   128  	}
   129  	e.Component = builder.Build()
   130  
   131  	err = e.setupResponseMessageHandler()
   132  	if err != nil {
   133  		return nil, fmt.Errorf("could not setup message handler")
   134  	}
   135  
   136  	return e, nil
   137  }
   138  
   139  // setupResponseMessageHandler initializes the inbound queues and the MessageHandler for UNTRUSTED responses.
   140  func (e *Engine) setupResponseMessageHandler() error {
   141  	syncResponseQueue, err := fifoqueue.NewFifoQueue(defaultSyncResponseQueueCapacity)
   142  	if err != nil {
   143  		return fmt.Errorf("failed to create queue for sync responses: %w", err)
   144  	}
   145  
   146  	e.pendingSyncResponses = &engine.FifoMessageStore{
   147  		FifoQueue: syncResponseQueue,
   148  	}
   149  
   150  	blockResponseQueue, err := fifoqueue.NewFifoQueue(defaultBlockResponseQueueCapacity)
   151  	if err != nil {
   152  		return fmt.Errorf("failed to create queue for block responses: %w", err)
   153  	}
   154  
   155  	e.pendingBlockResponses = &engine.FifoMessageStore{
   156  		FifoQueue: blockResponseQueue,
   157  	}
   158  
   159  	// define message queueing behaviour
   160  	e.responseMessageHandler = engine.NewMessageHandler(
   161  		e.log,
   162  		engine.NewNotifier(),
   163  		engine.Pattern{
   164  			Match: func(msg *engine.Message) bool {
   165  				_, ok := msg.Payload.(*messages.SyncResponse)
   166  				if ok {
   167  					e.metrics.MessageReceived(metrics.EngineSynchronization, metrics.MessageSyncResponse)
   168  				}
   169  				return ok
   170  			},
   171  			Store: e.pendingSyncResponses,
   172  		},
   173  		engine.Pattern{
   174  			Match: func(msg *engine.Message) bool {
   175  				_, ok := msg.Payload.(*messages.BlockResponse)
   176  				if ok {
   177  					e.metrics.MessageReceived(metrics.EngineSynchronization, metrics.MessageBlockResponse)
   178  				}
   179  				return ok
   180  			},
   181  			Store: e.pendingBlockResponses,
   182  		},
   183  	)
   184  
   185  	return nil
   186  }
   187  
   188  // Process processes the given event from the node with the given origin ID in
   189  // a blocking manner. It returns the potential processing error when done.
   190  func (e *Engine) Process(channel channels.Channel, originID flow.Identifier, event interface{}) error {
   191  	err := e.process(channel, originID, event)
   192  	if err != nil {
   193  		if engine.IsIncompatibleInputTypeError(err) {
   194  			e.log.Warn().Msgf("%v delivered unsupported message %T through %v", originID, event, channel)
   195  			return nil
   196  		}
   197  		return fmt.Errorf("unexpected error while processing engine message: %w", err)
   198  	}
   199  	return nil
   200  }
   201  
   202  // process processes events for the synchronization engine.
   203  // Error returns:
   204  //   - IncompatibleInputTypeError if input has unexpected type
   205  //   - All other errors are potential symptoms of internal state corruption or bugs (fatal).
   206  func (e *Engine) process(channel channels.Channel, originID flow.Identifier, event interface{}) error {
   207  	switch message := event.(type) {
   208  	case *messages.BatchRequest:
   209  		err := e.validateBatchRequestForALSP(originID, message)
   210  		if err != nil {
   211  			irrecoverable.Throw(context.TODO(), fmt.Errorf("failed to validate batch request from %x: %w", originID[:], err))
   212  		}
   213  		return e.requestHandler.Process(channel, originID, event)
   214  	case *messages.RangeRequest:
   215  		err := e.validateRangeRequestForALSP(originID, message)
   216  		if err != nil {
   217  			irrecoverable.Throw(context.TODO(), fmt.Errorf("failed to validate range request from %x: %w", originID[:], err))
   218  		}
   219  		return e.requestHandler.Process(channel, originID, event)
   220  
   221  	case *messages.SyncRequest:
   222  		err := e.validateSyncRequestForALSP(originID)
   223  		if err != nil {
   224  			irrecoverable.Throw(context.TODO(), fmt.Errorf("failed to validate sync request from %x: %w", originID[:], err))
   225  		}
   226  		return e.requestHandler.Process(channel, originID, event)
   227  
   228  	case *messages.BlockResponse:
   229  		err := e.validateBlockResponseForALSP(channel, originID, message)
   230  		if err != nil {
   231  			irrecoverable.Throw(context.TODO(), fmt.Errorf("failed to validate block response from %x: %w", originID[:], err))
   232  		}
   233  		return e.responseMessageHandler.Process(originID, event)
   234  
   235  	case *messages.SyncResponse:
   236  		err := e.validateSyncResponseForALSP(channel, originID, message)
   237  		if err != nil {
   238  			irrecoverable.Throw(context.TODO(), fmt.Errorf("failed to validate sync response from %x: %w", originID[:], err))
   239  		}
   240  		return e.responseMessageHandler.Process(originID, event)
   241  	default:
   242  		return fmt.Errorf("received input with type %T from %x: %w", event, originID[:], engine.IncompatibleInputTypeError)
   243  	}
   244  }
   245  
   246  // responseProcessingLoop is a separate goroutine that performs processing of queued responses
   247  func (e *Engine) responseProcessingLoop(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   248  	ready()
   249  
   250  	notifier := e.responseMessageHandler.GetNotifier()
   251  	done := ctx.Done()
   252  	for {
   253  		select {
   254  		case <-done:
   255  			return
   256  		case <-notifier:
   257  			e.processAvailableResponses(ctx)
   258  		}
   259  	}
   260  }
   261  
   262  // processAvailableResponses is processor of pending events which drives events from networking layer to business logic.
   263  func (e *Engine) processAvailableResponses(ctx context.Context) {
   264  	for {
   265  		select {
   266  		case <-ctx.Done():
   267  			return
   268  		default:
   269  		}
   270  
   271  		msg, ok := e.pendingSyncResponses.Get()
   272  		if ok {
   273  			e.onSyncResponse(msg.OriginID, msg.Payload.(*messages.SyncResponse))
   274  			e.metrics.MessageHandled(metrics.EngineSynchronization, metrics.MessageSyncResponse)
   275  			continue
   276  		}
   277  
   278  		msg, ok = e.pendingBlockResponses.Get()
   279  		if ok {
   280  			e.onBlockResponse(msg.OriginID, msg.Payload.(*messages.BlockResponse))
   281  			e.metrics.MessageHandled(metrics.EngineSynchronization, metrics.MessageBlockResponse)
   282  			continue
   283  		}
   284  
   285  		// when there is no more messages in the queue, back to the loop to wait
   286  		// for the next incoming message to arrive.
   287  		return
   288  	}
   289  }
   290  
   291  // onSyncResponse processes a synchronization response.
   292  func (e *Engine) onSyncResponse(originID flow.Identifier, res *messages.SyncResponse) {
   293  	e.log.Debug().Str("origin_id", originID.String()).Msg("received sync response")
   294  	final := e.finalizedHeaderCache.Get()
   295  	e.core.HandleHeight(final, res.Height)
   296  }
   297  
   298  // onBlockResponse processes a response containing a specifically requested block.
   299  func (e *Engine) onBlockResponse(originID flow.Identifier, res *messages.BlockResponse) {
   300  	// process the blocks one by one
   301  	if len(res.Blocks) == 0 {
   302  		e.log.Debug().Msg("received empty block response")
   303  		return
   304  	}
   305  
   306  	first := res.Blocks[0].Header.Height
   307  	last := res.Blocks[len(res.Blocks)-1].Header.Height
   308  	e.log.Debug().Uint64("first", first).Uint64("last", last).Msg("received block response")
   309  
   310  	filteredBlocks := make([]*messages.BlockProposal, 0, len(res.Blocks))
   311  	for _, block := range res.Blocks {
   312  		header := block.Header
   313  		if !e.core.HandleBlock(&header) {
   314  			e.log.Debug().Uint64("height", header.Height).Msg("block handler rejected")
   315  			continue
   316  		}
   317  		filteredBlocks = append(filteredBlocks, &messages.BlockProposal{Block: block})
   318  	}
   319  
   320  	// forward the block to the compliance engine for validation and processing
   321  	e.comp.OnSyncedBlocks(flow.Slashable[[]*messages.BlockProposal]{
   322  		OriginID: originID,
   323  		Message:  filteredBlocks,
   324  	})
   325  }
   326  
   327  // checkLoop will regularly scan for items that need requesting.
   328  func (e *Engine) checkLoop(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   329  	ready()
   330  
   331  	pollChan := make(<-chan time.Time)
   332  	if e.pollInterval > 0 {
   333  		poll := time.NewTicker(e.pollInterval)
   334  		pollChan = poll.C
   335  		defer poll.Stop()
   336  	}
   337  	scan := time.NewTicker(e.scanInterval)
   338  	defer scan.Stop()
   339  
   340  	done := ctx.Done()
   341  	for {
   342  		// give the quit channel a priority to be selected
   343  		select {
   344  		case <-done:
   345  			return
   346  		default:
   347  		}
   348  
   349  		select {
   350  		case <-done:
   351  			return
   352  		case <-pollChan:
   353  			e.pollHeight()
   354  		case <-scan.C:
   355  			final := e.finalizedHeaderCache.Get()
   356  			participants := e.participantsProvider.Identifiers()
   357  			ranges, batches := e.core.ScanPending(final)
   358  			e.sendRequests(participants, ranges, batches)
   359  		}
   360  	}
   361  }
   362  
   363  // pollHeight will send a synchronization request to three random nodes.
   364  func (e *Engine) pollHeight() {
   365  	final := e.finalizedHeaderCache.Get()
   366  	participants := e.participantsProvider.Identifiers()
   367  
   368  	nonce, err := rand.Uint64()
   369  	if err != nil {
   370  		// TODO: this error should be returned by pollHeight()
   371  		// it is logged for now since the only error possible is related to a failure
   372  		// of the system entropy generation. Such error is going to cause failures in other
   373  		// components where it's handled properly and will lead to crashing the module.
   374  		e.log.Warn().Err(err).Msg("nonce generation failed during pollHeight")
   375  		return
   376  	}
   377  
   378  	// send the request for synchronization
   379  	req := &messages.SyncRequest{
   380  		Nonce:  nonce,
   381  		Height: final.Height,
   382  	}
   383  	e.log.Debug().
   384  		Uint64("height", req.Height).
   385  		Uint64("range_nonce", req.Nonce).
   386  		Msg("sending sync request")
   387  	err = e.con.Multicast(req, synccore.DefaultPollNodes, participants...)
   388  	if err != nil {
   389  		e.log.Warn().Err(err).Msg("sending sync request to poll heights failed")
   390  		return
   391  	}
   392  	e.metrics.MessageSent(metrics.EngineSynchronization, metrics.MessageSyncRequest)
   393  }
   394  
   395  // sendRequests sends a request for each range and batch using consensus participants from last finalized snapshot.
   396  func (e *Engine) sendRequests(participants flow.IdentifierList, ranges []chainsync.Range, batches []chainsync.Batch) {
   397  	var errs *multierror.Error
   398  
   399  	for _, ran := range ranges {
   400  		nonce, err := rand.Uint64()
   401  		if err != nil {
   402  			// TODO: this error should be returned by sendRequests
   403  			// it is logged for now since the only error possible is related to a failure
   404  			// of the system entropy generation. Such error is going to cause failures in other
   405  			// components where it's handled properly and will lead to crashing the module.
   406  			e.log.Error().Err(err).Msg("nonce generation failed during range request")
   407  			return
   408  		}
   409  		req := &messages.RangeRequest{
   410  			Nonce:      nonce,
   411  			FromHeight: ran.From,
   412  			ToHeight:   ran.To,
   413  		}
   414  		err = e.con.Multicast(req, synccore.DefaultBlockRequestNodes, participants...)
   415  		if err != nil {
   416  			errs = multierror.Append(errs, fmt.Errorf("could not submit range request: %w", err))
   417  			continue
   418  		}
   419  		e.log.Info().
   420  			Uint64("range_from", req.FromHeight).
   421  			Uint64("range_to", req.ToHeight).
   422  			Uint64("range_nonce", req.Nonce).
   423  			Msg("range requested")
   424  		e.core.RangeRequested(ran)
   425  		e.metrics.MessageSent(metrics.EngineSynchronization, metrics.MessageRangeRequest)
   426  	}
   427  
   428  	for _, batch := range batches {
   429  		nonce, err := rand.Uint64()
   430  		if err != nil {
   431  			// TODO: this error should be returned by sendRequests
   432  			// it is logged for now since the only error possible is related to a failure
   433  			// of the system entropy generation. Such error is going to cause failures in other
   434  			// components where it's handled properly and will lead to crashing the module.
   435  			e.log.Error().Err(err).Msg("nonce generation failed during batch request")
   436  			return
   437  		}
   438  		req := &messages.BatchRequest{
   439  			Nonce:    nonce,
   440  			BlockIDs: batch.BlockIDs,
   441  		}
   442  		err = e.con.Multicast(req, synccore.DefaultBlockRequestNodes, participants...)
   443  		if err != nil {
   444  			errs = multierror.Append(errs, fmt.Errorf("could not submit batch request: %w", err))
   445  			continue
   446  		}
   447  		e.log.Debug().
   448  			Strs("block_ids", flow.IdentifierList(batch.BlockIDs).Strings()).
   449  			Uint64("range_nonce", req.Nonce).
   450  			Msg("batch requested")
   451  		e.core.BatchRequested(batch)
   452  		e.metrics.MessageSent(metrics.EngineSynchronization, metrics.MessageBatchRequest)
   453  	}
   454  
   455  	if err := errs.ErrorOrNil(); err != nil {
   456  		e.log.Warn().Err(err).Msg("sending range and batch requests failed")
   457  	}
   458  }
   459  
   460  // validateBatchRequestForALSP checks if a batch request should be reported as a misbehavior and sends misbehavior report to ALSP.
   461  // The misbehavior is due to either:
   462  //  1. unambiguous malicious or incorrect behavior (0 block IDs) OR
   463  //  2. large number of block IDs in batch request. This is more ambiguous to detect as malicious behavior because there is no way to know for sure
   464  //     if the sender is sending a large batch request maliciously or not, so we use a probabilistic approach to report the misbehavior.
   465  //
   466  // Args:
   467  // - originID: the sender of the batch request
   468  // - batchRequest: the batch request to validate
   469  // Returns:
   470  // - error: If an error is encountered while validating the batch request. Error is assumed to be irrecoverable because of internal processes that didn't allow validation to complete.
   471  func (e *Engine) validateBatchRequestForALSP(originID flow.Identifier, batchRequest *messages.BatchRequest) error {
   472  	// Generate a random integer between 0 and spamProbabilityMultiplier (exclusive)
   473  	n, err := rand.Uint32n(spamProbabilityMultiplier)
   474  	if err != nil {
   475  		return fmt.Errorf("failed to generate random number from %x: %w", originID[:], err)
   476  	}
   477  
   478  	// validity check: if no block IDs, always report as misbehavior
   479  	if len(batchRequest.BlockIDs) == 0 {
   480  		e.log.Warn().
   481  			Hex("origin_id", logging.ID(originID)).
   482  			Str(logging.KeySuspicious, "true").
   483  			Str("reason", alsp.InvalidMessage.String()).
   484  			Msg("received invalid batch request with 0 block IDs, creating ALSP report")
   485  		report, err := alsp.NewMisbehaviorReport(originID, alsp.InvalidMessage)
   486  		if err != nil {
   487  			// failing to create the misbehavior report is unlikely. If an error is encountered while
   488  			// creating the misbehavior report it indicates a bug and processing can not proceed.
   489  			return fmt.Errorf("failed to create misbehavior report (invalid batch request, no block IDs) from %x: %w", originID[:], err)
   490  		}
   491  		// failed unambiguous validation check and should be reported as misbehavior
   492  		e.con.ReportMisbehavior(report)
   493  		return nil
   494  	}
   495  
   496  	// to avoid creating a misbehavior report for every batch request received, use a probabilistic approach.
   497  	// The larger the batch request and base probability, the higher the probability of creating a misbehavior report.
   498  
   499  	// batchRequestProb is calculated as follows:
   500  	// batchRequestBaseProb * (len(batchRequest.BlockIDs) + 1) / synccore.DefaultConfig().MaxSize
   501  	// Example 1 (small batch of block IDs) if the batch request is for 10 blocks IDs and batchRequestBaseProb is 0.01, then the probability of
   502  	// creating a misbehavior report is:
   503  	// batchRequestBaseProb * (10+1) / synccore.DefaultConfig().MaxSize
   504  	// = 0.01 * 11 / 64 = 0.00171875 = 0.171875%
   505  	// Example 2 (large batch of block IDs) if the batch request is for 1000 block IDs and batchRequestBaseProb is 0.01, then the probability of
   506  	// creating a misbehavior report is:
   507  	// batchRequestBaseProb * (1000+1) / synccore.DefaultConfig().MaxSize
   508  	// = 0.01 * 1001 / 64 = 0.15640625 = 15.640625%
   509  	batchRequestProb := e.spamDetectionConfig.batchRequestBaseProb * (float32(len(batchRequest.BlockIDs)) + 1) / float32(synccore.DefaultConfig().MaxSize)
   510  	if float32(n) < batchRequestProb*spamProbabilityMultiplier {
   511  		// create a misbehavior report
   512  		e.log.Debug().
   513  			Hex("origin_id", logging.ID(originID)).
   514  			Str(logging.KeyLoad, "true").
   515  			Str("reason", alsp.ResourceIntensiveRequest.String()).
   516  			Msgf("for %d block IDs, creating probabilistic ALSP report", len(batchRequest.BlockIDs))
   517  		report, err := alsp.NewMisbehaviorReport(originID, alsp.ResourceIntensiveRequest)
   518  		if err != nil {
   519  			// failing to create the misbehavior report is unlikely. If an error is encountered while
   520  			// creating the misbehavior report it indicates a bug and processing can not proceed.
   521  			return fmt.Errorf("failed to create misbehavior report from %x: %w", originID[:], err)
   522  		}
   523  		// failed probabilistic (load) validation check and should be reported as misbehavior
   524  		e.con.ReportMisbehavior(report)
   525  		return nil
   526  	}
   527  	return nil
   528  }
   529  
   530  // TODO: implement spam reporting similar to validateSyncRequestForALSP
   531  func (e *Engine) validateBlockResponseForALSP(channel channels.Channel, id flow.Identifier, blockResponse *messages.BlockResponse) error {
   532  	return nil
   533  }
   534  
   535  // validateRangeRequestForALSP checks if a range request should be reported as a misbehavior and sends misbehavior report to ALSP.
   536  // The misbehavior is due to either:
   537  //  1. unambiguous malicious or incorrect behavior (toHeight < fromHeight) OR
   538  //  2. large height in range request. This is more ambiguous to detect as malicious behavior because there is no way to know for sure
   539  //     if the sender is sending a large range request height maliciously or not, so we use a probabilistic approach to report the misbehavior.
   540  //
   541  // Args:
   542  // - originID: the sender of the range request
   543  // - rangeRequest: the range request to validate
   544  // Returns:
   545  // - error: If an error is encountered while validating the range request. Error is assumed to be irrecoverable because of internal processes that didn't allow validation to complete.
   546  func (e *Engine) validateRangeRequestForALSP(originID flow.Identifier, rangeRequest *messages.RangeRequest) error {
   547  	// Generate a random integer between 0 and spamProbabilityMultiplier (exclusive)
   548  	n, err := rand.Uint32n(spamProbabilityMultiplier)
   549  	if err != nil {
   550  		return fmt.Errorf("failed to generate random number from %x: %w", originID[:], err)
   551  	}
   552  
   553  	// check if range request is valid
   554  	if rangeRequest.ToHeight < rangeRequest.FromHeight {
   555  		e.log.Warn().
   556  			Hex("origin_id", logging.ID(originID)).
   557  			Str(logging.KeySuspicious, "true").
   558  			Str("reason", alsp.InvalidMessage.String()).
   559  			Msgf("received invalid range request from height %d is not less than the to height %d, creating ALSP report", rangeRequest.FromHeight, rangeRequest.ToHeight)
   560  		report, err := alsp.NewMisbehaviorReport(originID, alsp.InvalidMessage)
   561  		if err != nil {
   562  			// failing to create the misbehavior report is unlikely. If an error is encountered while
   563  			// creating the misbehavior report it indicates a bug and processing can not proceed.
   564  			return fmt.Errorf("failed to create misbehavior report (invalid range request) from %x: %w", originID[:], err)
   565  		}
   566  		// failed unambiguous validation check and should be reported as misbehavior
   567  		e.con.ReportMisbehavior(report)
   568  		return nil
   569  	}
   570  
   571  	// to avoid creating a misbehavior report for every range request received, use a probabilistic approach.
   572  	// The higher the range request and base probability, the higher the probability of creating a misbehavior report.
   573  
   574  	// rangeRequestProb is calculated as follows:
   575  	// rangeRequestBaseProb * ((rangeRequest.ToHeight-rangeRequest.FromHeight) + 1) / synccore.DefaultConfig().MaxSize
   576  	// Example 1 (small range) if the range request is for 10 blocks and rangeRequestBaseProb is 0.01, then the probability of
   577  	// creating a misbehavior report is:
   578  	// rangeRequestBaseProb * (10+1) / synccore.DefaultConfig().MaxSize
   579  	// = 0.01 * 11 / 64 = 0.00171875 = 0.171875%
   580  	// Example 2 (large range) if the range request is for 1000 blocks and rangeRequestBaseProb is 0.01, then the probability of
   581  	// creating a misbehavior report is:
   582  	// rangeRequestBaseProb * (1000+1) / synccore.DefaultConfig().MaxSize
   583  	// = 0.01 * 1001 / 64 = 0.15640625 = 15.640625%
   584  	rangeRequestProb := e.spamDetectionConfig.rangeRequestBaseProb * (float32(rangeRequest.ToHeight-rangeRequest.FromHeight) + 1) / float32(synccore.DefaultConfig().MaxSize)
   585  	if float32(n) < rangeRequestProb*spamProbabilityMultiplier {
   586  		// create a misbehavior report
   587  		e.log.Debug().
   588  			Hex("origin_id", logging.ID(originID)).
   589  			Str(logging.KeyLoad, "true").
   590  			Str("reason", alsp.ResourceIntensiveRequest.String()).
   591  			Msgf("from height %d to height %d, creating probabilistic ALSP report", rangeRequest.FromHeight, rangeRequest.ToHeight)
   592  		report, err := alsp.NewMisbehaviorReport(originID, alsp.ResourceIntensiveRequest)
   593  		if err != nil {
   594  			// failing to create the misbehavior report is unlikely. If an error is encountered while
   595  			// creating the misbehavior report it indicates a bug and processing can not proceed.
   596  			return fmt.Errorf("failed to create misbehavior report from %x: %w", originID[:], err)
   597  		}
   598  		// failed validation check and should be reported as misbehavior
   599  
   600  		// failed probabilistic (load) validation check and should be reported as misbehavior
   601  		e.con.ReportMisbehavior(report)
   602  		return nil
   603  	}
   604  
   605  	// passed all validation checks with no misbehavior detected
   606  	return nil
   607  }
   608  
   609  // validateSyncRequestForALSP checks if a sync request should be reported as a misbehavior and sends misbehavior report to ALSP.
   610  // The misbehavior is ambiguous to detect as malicious behavior because there is no way to know for sure if the sender is sending
   611  // a sync request maliciously or not, so we use a probabilistic approach to report the misbehavior.
   612  //
   613  // Args:
   614  // - originID: the sender of the sync request
   615  // Returns:
   616  // - error: If an error is encountered while validating the sync request. Error is assumed to be irrecoverable because of internal processes that didn't allow validation to complete.
   617  func (e *Engine) validateSyncRequestForALSP(originID flow.Identifier) error {
   618  	// Generate a random integer between 0 and spamProbabilityMultiplier (exclusive)
   619  	n, err := rand.Uint32n(spamProbabilityMultiplier)
   620  	if err != nil {
   621  		return fmt.Errorf("failed to generate random number from %x: %w", originID[:], err)
   622  	}
   623  
   624  	// to avoid creating a misbehavior report for every sync request received, use a probabilistic approach.
   625  	// Create a report with a probability of spamDetectionConfig.syncRequestProb
   626  	if float32(n) < e.spamDetectionConfig.syncRequestProb*spamProbabilityMultiplier {
   627  
   628  		// create misbehavior report
   629  		e.log.Debug().
   630  			Hex("origin_id", logging.ID(originID)).
   631  			Str(logging.KeyLoad, "true").
   632  			Str("reason", alsp.ResourceIntensiveRequest.String()).
   633  			Msg("creating probabilistic ALSP report")
   634  
   635  		report, err := alsp.NewMisbehaviorReport(originID, alsp.ResourceIntensiveRequest)
   636  		if err != nil {
   637  			// failing to create the misbehavior report is unlikely. If an error is encountered while
   638  			// creating the misbehavior report it indicates a bug and processing can not proceed.
   639  			return fmt.Errorf("failed to create misbehavior report from %x: %w", originID[:], err)
   640  		}
   641  		e.con.ReportMisbehavior(report)
   642  		return nil
   643  	}
   644  
   645  	// passed all validation checks with no misbehavior detected
   646  	return nil
   647  }
   648  
   649  // TODO: implement spam reporting similar to validateSyncRequestForALSP
   650  func (e *Engine) validateSyncResponseForALSP(channel channels.Channel, id flow.Identifier, syncResponse *messages.SyncResponse) error {
   651  	return nil
   652  }