github.com/onflow/flow-go@v0.33.17/engine/common/synchronization/engine.go

github.com/onflow/flow-go@v0.33.17/engine/common/synchronization/engine.go (about)

     1  // (c) 2019 Dapper Labs - ALL RIGHTS RESERVED
     2  
     3  package synchronization
     4  
     5  import (
     6  	"context"
     7  	"fmt"
     8  	"time"
     9  
    10  	"github.com/hashicorp/go-multierror"
    11  	"github.com/rs/zerolog"
    12  
    13  	"github.com/onflow/flow-go/consensus/hotstuff"
    14  	"github.com/onflow/flow-go/engine"
    15  	"github.com/onflow/flow-go/engine/common/fifoqueue"
    16  	"github.com/onflow/flow-go/engine/consensus"
    17  	"github.com/onflow/flow-go/model/chainsync"
    18  	"github.com/onflow/flow-go/model/flow"
    19  	"github.com/onflow/flow-go/model/messages"
    20  	"github.com/onflow/flow-go/module"
    21  	synccore "github.com/onflow/flow-go/module/chainsync"
    22  	"github.com/onflow/flow-go/module/component"
    23  	"github.com/onflow/flow-go/module/events"
    24  	"github.com/onflow/flow-go/module/irrecoverable"
    25  	"github.com/onflow/flow-go/module/metrics"
    26  	"github.com/onflow/flow-go/network"
    27  	"github.com/onflow/flow-go/network/alsp"
    28  	"github.com/onflow/flow-go/network/channels"
    29  	"github.com/onflow/flow-go/state/protocol"
    30  	"github.com/onflow/flow-go/storage"
    31  	"github.com/onflow/flow-go/utils/logging"
    32  	"github.com/onflow/flow-go/utils/rand"
    33  )
    34  
    35  // defaultSyncResponseQueueCapacity maximum capacity of sync responses queue
    36  const defaultSyncResponseQueueCapacity = 500
    37  
    38  // defaultBlockResponseQueueCapacity maximum capacity of block responses queue
    39  const defaultBlockResponseQueueCapacity = 500
    40  
    41  // Engine is the synchronization engine, responsible for synchronizing chain state.
    42  type Engine struct {
    43  	component.Component
    44  	hotstuff.FinalizationConsumer
    45  
    46  	log                  zerolog.Logger
    47  	metrics              module.EngineMetrics
    48  	me                   module.Local
    49  	finalizedHeaderCache module.FinalizedHeaderCache
    50  	con                  network.Conduit
    51  	blocks               storage.Blocks
    52  	comp                 consensus.Compliance
    53  
    54  	pollInterval         time.Duration
    55  	scanInterval         time.Duration
    56  	core                 module.SyncCore
    57  	participantsProvider module.IdentifierProvider
    58  
    59  	requestHandler      *RequestHandler // component responsible for handling requests
    60  	spamDetectionConfig *SpamDetectionConfig
    61  
    62  	pendingSyncResponses   engine.MessageStore    // message store for *message.SyncResponse
    63  	pendingBlockResponses  engine.MessageStore    // message store for *message.BlockResponse
    64  	responseMessageHandler *engine.MessageHandler // message handler responsible for response processing
    65  }
    66  
    67  var _ network.MessageProcessor = (*Engine)(nil)
    68  var _ component.Component = (*Engine)(nil)
    69  
    70  // New creates a new main chain synchronization engine.
    71  func New(
    72  	log zerolog.Logger,
    73  	metrics module.EngineMetrics,
    74  	net network.EngineRegistry,
    75  	me module.Local,
    76  	state protocol.State,
    77  	blocks storage.Blocks,
    78  	comp consensus.Compliance,
    79  	core module.SyncCore,
    80  	participantsProvider module.IdentifierProvider,
    81  	spamDetectionConfig *SpamDetectionConfig,
    82  	opts ...OptionFunc,
    83  ) (*Engine, error) {
    84  
    85  	opt := DefaultConfig()
    86  	for _, f := range opts {
    87  		f(opt)
    88  	}
    89  
    90  	if comp == nil {
    91  		panic("must initialize synchronization engine with comp engine")
    92  	}
    93  
    94  	finalizedHeaderCache, finalizedCacheWorker, err := events.NewFinalizedHeaderCache(state)
    95  	if err != nil {
    96  		return nil, fmt.Errorf("could not create finalized header cache: %w", err)
    97  	}
    98  
    99  	// initialize the propagation engine with its dependencies
   100  	e := &Engine{
   101  		FinalizationConsumer: finalizedHeaderCache,
   102  		log:                  log.With().Str("engine", "synchronization").Logger(),
   103  		metrics:              metrics,
   104  		me:                   me,
   105  		finalizedHeaderCache: finalizedHeaderCache,
   106  		blocks:               blocks,
   107  		comp:                 comp,
   108  		core:                 core,
   109  		pollInterval:         opt.PollInterval,
   110  		scanInterval:         opt.ScanInterval,
   111  		participantsProvider: participantsProvider,
   112  		spamDetectionConfig:  spamDetectionConfig,
   113  	}
   114  
   115  	// register the engine with the network layer and store the conduit
   116  	con, err := net.Register(channels.SyncCommittee, e)
   117  	if err != nil {
   118  		return nil, fmt.Errorf("could not register engine: %w", err)
   119  	}
   120  	e.con = con
   121  	e.requestHandler = NewRequestHandler(log, metrics, NewResponseSender(con), me, finalizedHeaderCache, blocks, core, true)
   122  
   123  	// set up worker routines
   124  	builder := component.NewComponentManagerBuilder().
   125  		AddWorker(finalizedCacheWorker).
   126  		AddWorker(e.checkLoop).
   127  		AddWorker(e.responseProcessingLoop)
   128  	for i := 0; i < defaultEngineRequestsWorkers; i++ {
   129  		builder.AddWorker(e.requestHandler.requestProcessingWorker)
   130  	}
   131  	e.Component = builder.Build()
   132  
   133  	err = e.setupResponseMessageHandler()
   134  	if err != nil {
   135  		return nil, fmt.Errorf("could not setup message handler")
   136  	}
   137  
   138  	return e, nil
   139  }
   140  
   141  // setupResponseMessageHandler initializes the inbound queues and the MessageHandler for UNTRUSTED responses.
   142  func (e *Engine) setupResponseMessageHandler() error {
   143  	syncResponseQueue, err := fifoqueue.NewFifoQueue(defaultSyncResponseQueueCapacity)
   144  	if err != nil {
   145  		return fmt.Errorf("failed to create queue for sync responses: %w", err)
   146  	}
   147  
   148  	e.pendingSyncResponses = &engine.FifoMessageStore{
   149  		FifoQueue: syncResponseQueue,
   150  	}
   151  
   152  	blockResponseQueue, err := fifoqueue.NewFifoQueue(defaultBlockResponseQueueCapacity)
   153  	if err != nil {
   154  		return fmt.Errorf("failed to create queue for block responses: %w", err)
   155  	}
   156  
   157  	e.pendingBlockResponses = &engine.FifoMessageStore{
   158  		FifoQueue: blockResponseQueue,
   159  	}
   160  
   161  	// define message queueing behaviour
   162  	e.responseMessageHandler = engine.NewMessageHandler(
   163  		e.log,
   164  		engine.NewNotifier(),
   165  		engine.Pattern{
   166  			Match: func(msg *engine.Message) bool {
   167  				_, ok := msg.Payload.(*messages.SyncResponse)
   168  				if ok {
   169  					e.metrics.MessageReceived(metrics.EngineSynchronization, metrics.MessageSyncResponse)
   170  				}
   171  				return ok
   172  			},
   173  			Store: e.pendingSyncResponses,
   174  		},
   175  		engine.Pattern{
   176  			Match: func(msg *engine.Message) bool {
   177  				_, ok := msg.Payload.(*messages.BlockResponse)
   178  				if ok {
   179  					e.metrics.MessageReceived(metrics.EngineSynchronization, metrics.MessageBlockResponse)
   180  				}
   181  				return ok
   182  			},
   183  			Store: e.pendingBlockResponses,
   184  		},
   185  	)
   186  
   187  	return nil
   188  }
   189  
   190  // Process processes the given event from the node with the given origin ID in
   191  // a blocking manner. It returns the potential processing error when done.
   192  func (e *Engine) Process(channel channels.Channel, originID flow.Identifier, event interface{}) error {
   193  	err := e.process(channel, originID, event)
   194  	if err != nil {
   195  		if engine.IsIncompatibleInputTypeError(err) {
   196  			e.log.Warn().Msgf("%v delivered unsupported message %T through %v", originID, event, channel)
   197  			return nil
   198  		}
   199  		return fmt.Errorf("unexpected error while processing engine message: %w", err)
   200  	}
   201  	return nil
   202  }
   203  
   204  // process processes events for the synchronization engine.
   205  // Error returns:
   206  //   - IncompatibleInputTypeError if input has unexpected type
   207  //   - All other errors are potential symptoms of internal state corruption or bugs (fatal).
   208  func (e *Engine) process(channel channels.Channel, originID flow.Identifier, event interface{}) error {
   209  	switch message := event.(type) {
   210  	case *messages.BatchRequest:
   211  		err := e.validateBatchRequestForALSP(originID, message)
   212  		if err != nil {
   213  			irrecoverable.Throw(context.TODO(), fmt.Errorf("failed to validate batch request from %x: %w", originID[:], err))
   214  		}
   215  		return e.requestHandler.Process(channel, originID, event)
   216  	case *messages.RangeRequest:
   217  		err := e.validateRangeRequestForALSP(originID, message)
   218  		if err != nil {
   219  			irrecoverable.Throw(context.TODO(), fmt.Errorf("failed to validate range request from %x: %w", originID[:], err))
   220  		}
   221  		return e.requestHandler.Process(channel, originID, event)
   222  
   223  	case *messages.SyncRequest:
   224  		err := e.validateSyncRequestForALSP(originID)
   225  		if err != nil {
   226  			irrecoverable.Throw(context.TODO(), fmt.Errorf("failed to validate sync request from %x: %w", originID[:], err))
   227  		}
   228  		return e.requestHandler.Process(channel, originID, event)
   229  
   230  	case *messages.BlockResponse:
   231  		err := e.validateBlockResponseForALSP(channel, originID, message)
   232  		if err != nil {
   233  			irrecoverable.Throw(context.TODO(), fmt.Errorf("failed to validate block response from %x: %w", originID[:], err))
   234  		}
   235  		return e.responseMessageHandler.Process(originID, event)
   236  
   237  	case *messages.SyncResponse:
   238  		err := e.validateSyncResponseForALSP(channel, originID, message)
   239  		if err != nil {
   240  			irrecoverable.Throw(context.TODO(), fmt.Errorf("failed to validate sync response from %x: %w", originID[:], err))
   241  		}
   242  		return e.responseMessageHandler.Process(originID, event)
   243  	default:
   244  		return fmt.Errorf("received input with type %T from %x: %w", event, originID[:], engine.IncompatibleInputTypeError)
   245  	}
   246  }
   247  
   248  // responseProcessingLoop is a separate goroutine that performs processing of queued responses
   249  func (e *Engine) responseProcessingLoop(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   250  	ready()
   251  
   252  	notifier := e.responseMessageHandler.GetNotifier()
   253  	done := ctx.Done()
   254  	for {
   255  		select {
   256  		case <-done:
   257  			return
   258  		case <-notifier:
   259  			e.processAvailableResponses(ctx)
   260  		}
   261  	}
   262  }
   263  
   264  // processAvailableResponses is processor of pending events which drives events from networking layer to business logic.
   265  func (e *Engine) processAvailableResponses(ctx context.Context) {
   266  	for {
   267  		select {
   268  		case <-ctx.Done():
   269  			return
   270  		default:
   271  		}
   272  
   273  		msg, ok := e.pendingSyncResponses.Get()
   274  		if ok {
   275  			e.onSyncResponse(msg.OriginID, msg.Payload.(*messages.SyncResponse))
   276  			e.metrics.MessageHandled(metrics.EngineSynchronization, metrics.MessageSyncResponse)
   277  			continue
   278  		}
   279  
   280  		msg, ok = e.pendingBlockResponses.Get()
   281  		if ok {
   282  			e.onBlockResponse(msg.OriginID, msg.Payload.(*messages.BlockResponse))
   283  			e.metrics.MessageHandled(metrics.EngineSynchronization, metrics.MessageBlockResponse)
   284  			continue
   285  		}
   286  
   287  		// when there is no more messages in the queue, back to the loop to wait
   288  		// for the next incoming message to arrive.
   289  		return
   290  	}
   291  }
   292  
   293  // onSyncResponse processes a synchronization response.
   294  func (e *Engine) onSyncResponse(originID flow.Identifier, res *messages.SyncResponse) {
   295  	e.log.Debug().Str("origin_id", originID.String()).Msg("received sync response")
   296  	final := e.finalizedHeaderCache.Get()
   297  	e.core.HandleHeight(final, res.Height)
   298  }
   299  
   300  // onBlockResponse processes a response containing a specifically requested block.
   301  func (e *Engine) onBlockResponse(originID flow.Identifier, res *messages.BlockResponse) {
   302  	// process the blocks one by one
   303  	if len(res.Blocks) == 0 {
   304  		e.log.Debug().Msg("received empty block response")
   305  		return
   306  	}
   307  
   308  	first := res.Blocks[0].Header.Height
   309  	last := res.Blocks[len(res.Blocks)-1].Header.Height
   310  	e.log.Debug().Uint64("first", first).Uint64("last", last).Msg("received block response")
   311  
   312  	filteredBlocks := make([]*messages.BlockProposal, 0, len(res.Blocks))
   313  	for _, block := range res.Blocks {
   314  		header := block.Header
   315  		if !e.core.HandleBlock(&header) {
   316  			e.log.Debug().Uint64("height", header.Height).Msg("block handler rejected")
   317  			continue
   318  		}
   319  		filteredBlocks = append(filteredBlocks, &messages.BlockProposal{Block: block})
   320  	}
   321  
   322  	// forward the block to the compliance engine for validation and processing
   323  	e.comp.OnSyncedBlocks(flow.Slashable[[]*messages.BlockProposal]{
   324  		OriginID: originID,
   325  		Message:  filteredBlocks,
   326  	})
   327  }
   328  
   329  // checkLoop will regularly scan for items that need requesting.
   330  func (e *Engine) checkLoop(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   331  	ready()
   332  
   333  	pollChan := make(<-chan time.Time)
   334  	if e.pollInterval > 0 {
   335  		poll := time.NewTicker(e.pollInterval)
   336  		pollChan = poll.C
   337  		defer poll.Stop()
   338  	}
   339  	scan := time.NewTicker(e.scanInterval)
   340  	defer scan.Stop()
   341  
   342  	done := ctx.Done()
   343  	for {
   344  		// give the quit channel a priority to be selected
   345  		select {
   346  		case <-done:
   347  			return
   348  		default:
   349  		}
   350  
   351  		select {
   352  		case <-done:
   353  			return
   354  		case <-pollChan:
   355  			e.pollHeight()
   356  		case <-scan.C:
   357  			final := e.finalizedHeaderCache.Get()
   358  			participants := e.participantsProvider.Identifiers()
   359  			ranges, batches := e.core.ScanPending(final)
   360  			e.sendRequests(participants, ranges, batches)
   361  		}
   362  	}
   363  }
   364  
   365  // pollHeight will send a synchronization request to three random nodes.
   366  func (e *Engine) pollHeight() {
   367  	final := e.finalizedHeaderCache.Get()
   368  	participants := e.participantsProvider.Identifiers()
   369  
   370  	nonce, err := rand.Uint64()
   371  	if err != nil {
   372  		// TODO: this error should be returned by pollHeight()
   373  		// it is logged for now since the only error possible is related to a failure
   374  		// of the system entropy generation. Such error is going to cause failures in other
   375  		// components where it's handled properly and will lead to crashing the module.
   376  		e.log.Warn().Err(err).Msg("nonce generation failed during pollHeight")
   377  		return
   378  	}
   379  
   380  	// send the request for synchronization
   381  	req := &messages.SyncRequest{
   382  		Nonce:  nonce,
   383  		Height: final.Height,
   384  	}
   385  	e.log.Debug().
   386  		Uint64("height", req.Height).
   387  		Uint64("range_nonce", req.Nonce).
   388  		Msg("sending sync request")
   389  	err = e.con.Multicast(req, synccore.DefaultPollNodes, participants...)
   390  	if err != nil {
   391  		e.log.Warn().Err(err).Msg("sending sync request to poll heights failed")
   392  		return
   393  	}
   394  	e.metrics.MessageSent(metrics.EngineSynchronization, metrics.MessageSyncRequest)
   395  }
   396  
   397  // sendRequests sends a request for each range and batch using consensus participants from last finalized snapshot.
   398  func (e *Engine) sendRequests(participants flow.IdentifierList, ranges []chainsync.Range, batches []chainsync.Batch) {
   399  	var errs *multierror.Error
   400  
   401  	for _, ran := range ranges {
   402  		nonce, err := rand.Uint64()
   403  		if err != nil {
   404  			// TODO: this error should be returned by sendRequests
   405  			// it is logged for now since the only error possible is related to a failure
   406  			// of the system entropy generation. Such error is going to cause failures in other
   407  			// components where it's handled properly and will lead to crashing the module.
   408  			e.log.Error().Err(err).Msg("nonce generation failed during range request")
   409  			return
   410  		}
   411  		req := &messages.RangeRequest{
   412  			Nonce:      nonce,
   413  			FromHeight: ran.From,
   414  			ToHeight:   ran.To,
   415  		}
   416  		err = e.con.Multicast(req, synccore.DefaultBlockRequestNodes, participants...)
   417  		if err != nil {
   418  			errs = multierror.Append(errs, fmt.Errorf("could not submit range request: %w", err))
   419  			continue
   420  		}
   421  		e.log.Info().
   422  			Uint64("range_from", req.FromHeight).
   423  			Uint64("range_to", req.ToHeight).
   424  			Uint64("range_nonce", req.Nonce).
   425  			Msg("range requested")
   426  		e.core.RangeRequested(ran)
   427  		e.metrics.MessageSent(metrics.EngineSynchronization, metrics.MessageRangeRequest)
   428  	}
   429  
   430  	for _, batch := range batches {
   431  		nonce, err := rand.Uint64()
   432  		if err != nil {
   433  			// TODO: this error should be returned by sendRequests
   434  			// it is logged for now since the only error possible is related to a failure
   435  			// of the system entropy generation. Such error is going to cause failures in other
   436  			// components where it's handled properly and will lead to crashing the module.
   437  			e.log.Error().Err(err).Msg("nonce generation failed during batch request")
   438  			return
   439  		}
   440  		req := &messages.BatchRequest{
   441  			Nonce:    nonce,
   442  			BlockIDs: batch.BlockIDs,
   443  		}
   444  		err = e.con.Multicast(req, synccore.DefaultBlockRequestNodes, participants...)
   445  		if err != nil {
   446  			errs = multierror.Append(errs, fmt.Errorf("could not submit batch request: %w", err))
   447  			continue
   448  		}
   449  		e.log.Debug().
   450  			Strs("block_ids", flow.IdentifierList(batch.BlockIDs).Strings()).
   451  			Uint64("range_nonce", req.Nonce).
   452  			Msg("batch requested")
   453  		e.core.BatchRequested(batch)
   454  		e.metrics.MessageSent(metrics.EngineSynchronization, metrics.MessageBatchRequest)
   455  	}
   456  
   457  	if err := errs.ErrorOrNil(); err != nil {
   458  		e.log.Warn().Err(err).Msg("sending range and batch requests failed")
   459  	}
   460  }
   461  
   462  // validateBatchRequestForALSP checks if a batch request should be reported as a misbehavior and sends misbehavior report to ALSP.
   463  // The misbehavior is due to either:
   464  //  1. unambiguous malicious or incorrect behavior (0 block IDs) OR
   465  //  2. large number of block IDs in batch request. This is more ambiguous to detect as malicious behavior because there is no way to know for sure
   466  //     if the sender is sending a large batch request maliciously or not, so we use a probabilistic approach to report the misbehavior.
   467  //
   468  // Args:
   469  // - originID: the sender of the batch request
   470  // - batchRequest: the batch request to validate
   471  // Returns:
   472  // - error: If an error is encountered while validating the batch request. Error is assumed to be irrecoverable because of internal processes that didn't allow validation to complete.
   473  func (e *Engine) validateBatchRequestForALSP(originID flow.Identifier, batchRequest *messages.BatchRequest) error {
   474  	// Generate a random integer between 0 and spamProbabilityMultiplier (exclusive)
   475  	n, err := rand.Uint32n(spamProbabilityMultiplier)
   476  	if err != nil {
   477  		return fmt.Errorf("failed to generate random number from %x: %w", originID[:], err)
   478  	}
   479  
   480  	// validity check: if no block IDs, always report as misbehavior
   481  	if len(batchRequest.BlockIDs) == 0 {
   482  		e.log.Warn().
   483  			Hex("origin_id", logging.ID(originID)).
   484  			Str(logging.KeySuspicious, "true").
   485  			Str("reason", alsp.InvalidMessage.String()).
   486  			Msg("received invalid batch request with 0 block IDs, creating ALSP report")
   487  		report, err := alsp.NewMisbehaviorReport(originID, alsp.InvalidMessage)
   488  		if err != nil {
   489  			// failing to create the misbehavior report is unlikely. If an error is encountered while
   490  			// creating the misbehavior report it indicates a bug and processing can not proceed.
   491  			return fmt.Errorf("failed to create misbehavior report (invalid batch request, no block IDs) from %x: %w", originID[:], err)
   492  		}
   493  		// failed unambiguous validation check and should be reported as misbehavior
   494  		e.con.ReportMisbehavior(report)
   495  		return nil
   496  	}
   497  
   498  	// to avoid creating a misbehavior report for every batch request received, use a probabilistic approach.
   499  	// The larger the batch request and base probability, the higher the probability of creating a misbehavior report.
   500  
   501  	// batchRequestProb is calculated as follows:
   502  	// batchRequestBaseProb * (len(batchRequest.BlockIDs) + 1) / synccore.DefaultConfig().MaxSize
   503  	// Example 1 (small batch of block IDs) if the batch request is for 10 blocks IDs and batchRequestBaseProb is 0.01, then the probability of
   504  	// creating a misbehavior report is:
   505  	// batchRequestBaseProb * (10+1) / synccore.DefaultConfig().MaxSize
   506  	// = 0.01 * 11 / 64 = 0.00171875 = 0.171875%
   507  	// Example 2 (large batch of block IDs) if the batch request is for 1000 block IDs and batchRequestBaseProb is 0.01, then the probability of
   508  	// creating a misbehavior report is:
   509  	// batchRequestBaseProb * (1000+1) / synccore.DefaultConfig().MaxSize
   510  	// = 0.01 * 1001 / 64 = 0.15640625 = 15.640625%
   511  	batchRequestProb := e.spamDetectionConfig.batchRequestBaseProb * (float32(len(batchRequest.BlockIDs)) + 1) / float32(synccore.DefaultConfig().MaxSize)
   512  	if float32(n) < batchRequestProb*spamProbabilityMultiplier {
   513  		// create a misbehavior report
   514  		e.log.Debug().
   515  			Hex("origin_id", logging.ID(originID)).
   516  			Str(logging.KeyLoad, "true").
   517  			Str("reason", alsp.ResourceIntensiveRequest.String()).
   518  			Msgf("for %d block IDs, creating probabilistic ALSP report", len(batchRequest.BlockIDs))
   519  		report, err := alsp.NewMisbehaviorReport(originID, alsp.ResourceIntensiveRequest)
   520  		if err != nil {
   521  			// failing to create the misbehavior report is unlikely. If an error is encountered while
   522  			// creating the misbehavior report it indicates a bug and processing can not proceed.
   523  			return fmt.Errorf("failed to create misbehavior report from %x: %w", originID[:], err)
   524  		}
   525  		// failed probabilistic (load) validation check and should be reported as misbehavior
   526  		e.con.ReportMisbehavior(report)
   527  		return nil
   528  	}
   529  	return nil
   530  }
   531  
   532  // TODO: implement spam reporting similar to validateSyncRequestForALSP
   533  func (e *Engine) validateBlockResponseForALSP(channel channels.Channel, id flow.Identifier, blockResponse *messages.BlockResponse) error {
   534  	return nil
   535  }
   536  
   537  // validateRangeRequestForALSP checks if a range request should be reported as a misbehavior and sends misbehavior report to ALSP.
   538  // The misbehavior is due to either:
   539  //  1. unambiguous malicious or incorrect behavior (toHeight < fromHeight) OR
   540  //  2. large height in range request. This is more ambiguous to detect as malicious behavior because there is no way to know for sure
   541  //     if the sender is sending a large range request height maliciously or not, so we use a probabilistic approach to report the misbehavior.
   542  //
   543  // Args:
   544  // - originID: the sender of the range request
   545  // - rangeRequest: the range request to validate
   546  // Returns:
   547  // - error: If an error is encountered while validating the range request. Error is assumed to be irrecoverable because of internal processes that didn't allow validation to complete.
   548  func (e *Engine) validateRangeRequestForALSP(originID flow.Identifier, rangeRequest *messages.RangeRequest) error {
   549  	// Generate a random integer between 0 and spamProbabilityMultiplier (exclusive)
   550  	n, err := rand.Uint32n(spamProbabilityMultiplier)
   551  	if err != nil {
   552  		return fmt.Errorf("failed to generate random number from %x: %w", originID[:], err)
   553  	}
   554  
   555  	// check if range request is valid
   556  	if rangeRequest.ToHeight < rangeRequest.FromHeight {
   557  		e.log.Warn().
   558  			Hex("origin_id", logging.ID(originID)).
   559  			Str(logging.KeySuspicious, "true").
   560  			Str("reason", alsp.InvalidMessage.String()).
   561  			Msgf("received invalid range request from height %d is not less than the to height %d, creating ALSP report", rangeRequest.FromHeight, rangeRequest.ToHeight)
   562  		report, err := alsp.NewMisbehaviorReport(originID, alsp.InvalidMessage)
   563  		if err != nil {
   564  			// failing to create the misbehavior report is unlikely. If an error is encountered while
   565  			// creating the misbehavior report it indicates a bug and processing can not proceed.
   566  			return fmt.Errorf("failed to create misbehavior report (invalid range request) from %x: %w", originID[:], err)
   567  		}
   568  		// failed unambiguous validation check and should be reported as misbehavior
   569  		e.con.ReportMisbehavior(report)
   570  		return nil
   571  	}
   572  
   573  	// to avoid creating a misbehavior report for every range request received, use a probabilistic approach.
   574  	// The higher the range request and base probability, the higher the probability of creating a misbehavior report.
   575  
   576  	// rangeRequestProb is calculated as follows:
   577  	// rangeRequestBaseProb * ((rangeRequest.ToHeight-rangeRequest.FromHeight) + 1) / synccore.DefaultConfig().MaxSize
   578  	// Example 1 (small range) if the range request is for 10 blocks and rangeRequestBaseProb is 0.01, then the probability of
   579  	// creating a misbehavior report is:
   580  	// rangeRequestBaseProb * (10+1) / synccore.DefaultConfig().MaxSize
   581  	// = 0.01 * 11 / 64 = 0.00171875 = 0.171875%
   582  	// Example 2 (large range) if the range request is for 1000 blocks and rangeRequestBaseProb is 0.01, then the probability of
   583  	// creating a misbehavior report is:
   584  	// rangeRequestBaseProb * (1000+1) / synccore.DefaultConfig().MaxSize
   585  	// = 0.01 * 1001 / 64 = 0.15640625 = 15.640625%
   586  	rangeRequestProb := e.spamDetectionConfig.rangeRequestBaseProb * (float32(rangeRequest.ToHeight-rangeRequest.FromHeight) + 1) / float32(synccore.DefaultConfig().MaxSize)
   587  	if float32(n) < rangeRequestProb*spamProbabilityMultiplier {
   588  		// create a misbehavior report
   589  		e.log.Debug().
   590  			Hex("origin_id", logging.ID(originID)).
   591  			Str(logging.KeyLoad, "true").
   592  			Str("reason", alsp.ResourceIntensiveRequest.String()).
   593  			Msgf("from height %d to height %d, creating probabilistic ALSP report", rangeRequest.FromHeight, rangeRequest.ToHeight)
   594  		report, err := alsp.NewMisbehaviorReport(originID, alsp.ResourceIntensiveRequest)
   595  		if err != nil {
   596  			// failing to create the misbehavior report is unlikely. If an error is encountered while
   597  			// creating the misbehavior report it indicates a bug and processing can not proceed.
   598  			return fmt.Errorf("failed to create misbehavior report from %x: %w", originID[:], err)
   599  		}
   600  		// failed validation check and should be reported as misbehavior
   601  
   602  		// failed probabilistic (load) validation check and should be reported as misbehavior
   603  		e.con.ReportMisbehavior(report)
   604  		return nil
   605  	}
   606  
   607  	// passed all validation checks with no misbehavior detected
   608  	return nil
   609  }
   610  
   611  // validateSyncRequestForALSP checks if a sync request should be reported as a misbehavior and sends misbehavior report to ALSP.
   612  // The misbehavior is ambiguous to detect as malicious behavior because there is no way to know for sure if the sender is sending
   613  // a sync request maliciously or not, so we use a probabilistic approach to report the misbehavior.
   614  //
   615  // Args:
   616  // - originID: the sender of the sync request
   617  // Returns:
   618  // - error: If an error is encountered while validating the sync request. Error is assumed to be irrecoverable because of internal processes that didn't allow validation to complete.
   619  func (e *Engine) validateSyncRequestForALSP(originID flow.Identifier) error {
   620  	// Generate a random integer between 0 and spamProbabilityMultiplier (exclusive)
   621  	n, err := rand.Uint32n(spamProbabilityMultiplier)
   622  	if err != nil {
   623  		return fmt.Errorf("failed to generate random number from %x: %w", originID[:], err)
   624  	}
   625  
   626  	// to avoid creating a misbehavior report for every sync request received, use a probabilistic approach.
   627  	// Create a report with a probability of spamDetectionConfig.syncRequestProb
   628  	if float32(n) < e.spamDetectionConfig.syncRequestProb*spamProbabilityMultiplier {
   629  
   630  		// create misbehavior report
   631  		e.log.Debug().
   632  			Hex("origin_id", logging.ID(originID)).
   633  			Str(logging.KeyLoad, "true").
   634  			Str("reason", alsp.ResourceIntensiveRequest.String()).
   635  			Msg("creating probabilistic ALSP report")
   636  
   637  		report, err := alsp.NewMisbehaviorReport(originID, alsp.ResourceIntensiveRequest)
   638  		if err != nil {
   639  			// failing to create the misbehavior report is unlikely. If an error is encountered while
   640  			// creating the misbehavior report it indicates a bug and processing can not proceed.
   641  			return fmt.Errorf("failed to create misbehavior report from %x: %w", originID[:], err)
   642  		}
   643  		e.con.ReportMisbehavior(report)
   644  		return nil
   645  	}
   646  
   647  	// passed all validation checks with no misbehavior detected
   648  	return nil
   649  }
   650  
   651  // TODO: implement spam reporting similar to validateSyncRequestForALSP
   652  func (e *Engine) validateSyncResponseForALSP(channel channels.Channel, id flow.Identifier, syncResponse *messages.SyncResponse) error {
   653  	return nil
   654  }