github.com/onflow/flow-go@v0.33.17/network/p2p/inspector/validation/control_message_validation_inspector.go (about)

     1  package validation
     2  
     3  import (
     4  	"fmt"
     5  	"time"
     6  
     7  	"github.com/go-playground/validator/v10"
     8  	"github.com/hashicorp/go-multierror"
     9  	pubsub "github.com/libp2p/go-libp2p-pubsub"
    10  	pubsub_pb "github.com/libp2p/go-libp2p-pubsub/pb"
    11  	"github.com/libp2p/go-libp2p/core/peer"
    12  	"github.com/rs/zerolog"
    13  
    14  	"github.com/onflow/flow-go/engine/common/worker"
    15  	"github.com/onflow/flow-go/model/flow"
    16  	"github.com/onflow/flow-go/module"
    17  	"github.com/onflow/flow-go/module/component"
    18  	"github.com/onflow/flow-go/module/irrecoverable"
    19  	"github.com/onflow/flow-go/module/mempool/queue"
    20  	"github.com/onflow/flow-go/module/metrics"
    21  	"github.com/onflow/flow-go/network"
    22  	"github.com/onflow/flow-go/network/channels"
    23  	"github.com/onflow/flow-go/network/p2p"
    24  	p2pconfig "github.com/onflow/flow-go/network/p2p/config"
    25  	"github.com/onflow/flow-go/network/p2p/inspector/internal/cache"
    26  	p2plogging "github.com/onflow/flow-go/network/p2p/logging"
    27  	p2pmsg "github.com/onflow/flow-go/network/p2p/message"
    28  	"github.com/onflow/flow-go/state/protocol"
    29  	"github.com/onflow/flow-go/state/protocol/events"
    30  	"github.com/onflow/flow-go/utils/logging"
    31  	flowrand "github.com/onflow/flow-go/utils/rand"
    32  )
    33  
    34  // ControlMsgValidationInspector RPC message inspector that inspects control messages and performs some validation on them,
    35  // when some validation rule is broken feedback is given via the Peer scoring notifier.
    36  type ControlMsgValidationInspector struct {
    37  	component.Component
    38  	events.Noop
    39  	ctx     irrecoverable.SignalerContext
    40  	logger  zerolog.Logger
    41  	sporkID flow.Identifier
    42  	metrics module.GossipSubRpcValidationInspectorMetrics
    43  	// config control message validation configurations.
    44  	config *p2pconfig.RpcValidationInspector
    45  	// distributor used to disseminate invalid RPC message notifications.
    46  	distributor p2p.GossipSubInspectorNotifDistributor
    47  	// workerPool queue that stores *InspectRPCRequest that will be processed by component workers.
    48  	workerPool *worker.Pool[*InspectRPCRequest]
    49  	// tracker is a map that associates the hash of a peer's ID with the
    50  	// number of cluster-prefix topic control messages received from that peer. It helps in tracking
    51  	// and managing the rate of incoming control messages from each peer, ensuring that the system
    52  	// stays performant and resilient against potential spam or abuse.
    53  	// The counter is incremented in the following scenarios:
    54  	// 1. The cluster prefix topic is received while the inspector waits for the cluster IDs provider to be set (this can happen during the startup or epoch transitions).
    55  	// 2. The node sends a cluster prefix topic where the cluster prefix does not match any of the active cluster IDs.
    56  	// In such cases, the inspector will allow a configured number of these messages from the corresponding peer.
    57  	tracker    *cache.ClusterPrefixedMessagesReceivedTracker
    58  	idProvider module.IdentityProvider
    59  	rpcTracker p2p.RpcControlTracking
    60  	// networkingType indicates public or private network, rpc publish messages are inspected for unstaked senders when running the private network.
    61  	networkingType network.NetworkingType
    62  	// topicOracle callback used to retrieve the current subscribed topics of the libp2p node.
    63  	topicOracle func() p2p.TopicProvider
    64  }
    65  
    66  type InspectorParams struct {
    67  	// Logger the logger used by the inspector.
    68  	Logger zerolog.Logger `validate:"required"`
    69  	// SporkID the current spork ID.
    70  	SporkID flow.Identifier `validate:"required"`
    71  	// Config inspector configuration.
    72  	Config *p2pconfig.RpcValidationInspector `validate:"required"`
    73  	// Distributor gossipsub inspector notification distributor.
    74  	Distributor p2p.GossipSubInspectorNotifDistributor `validate:"required"`
    75  	// HeroCacheMetricsFactory the metrics factory.
    76  	HeroCacheMetricsFactory metrics.HeroCacheMetricsFactory `validate:"required"`
    77  	// IdProvider identity provider is used to get the flow identifier for a peer.
    78  	IdProvider module.IdentityProvider `validate:"required"`
    79  	// InspectorMetrics metrics for the validation inspector.
    80  	InspectorMetrics module.GossipSubRpcValidationInspectorMetrics `validate:"required"`
    81  	// RpcTracker tracker used to track iHave RPC's sent and last size.
    82  	RpcTracker p2p.RpcControlTracking `validate:"required"`
    83  	// NetworkingType the networking type of the node.
    84  	NetworkingType network.NetworkingType `validate:"required"`
    85  	// TopicOracle callback used to retrieve the current subscribed topics of the libp2p node.
    86  	// It is set as a callback to avoid circular dependencies between the topic oracle and the inspector.
    87  	TopicOracle func() p2p.TopicProvider `validate:"required"`
    88  }
    89  
    90  var _ component.Component = (*ControlMsgValidationInspector)(nil)
    91  var _ p2p.GossipSubMsgValidationRpcInspector = (*ControlMsgValidationInspector)(nil)
    92  var _ protocol.Consumer = (*ControlMsgValidationInspector)(nil)
    93  
    94  // NewControlMsgValidationInspector returns new ControlMsgValidationInspector
    95  // Args:
    96  //   - *InspectorParams: params used to create the inspector.
    97  //
    98  // Returns:
    99  //   - *ControlMsgValidationInspector: a new control message validation inspector.
   100  //   - error: an error if there is any error while creating the inspector. All errors are irrecoverable and unexpected.
   101  func NewControlMsgValidationInspector(params *InspectorParams) (*ControlMsgValidationInspector, error) {
   102  	err := validator.New().Struct(params)
   103  	if err != nil {
   104  		return nil, fmt.Errorf("inspector params validation failed: %w", err)
   105  	}
   106  	lg := params.Logger.With().Str("component", "gossip_sub_rpc_validation_inspector").Logger()
   107  
   108  	inspectMsgQueueCacheCollector := metrics.GossipSubRPCInspectorQueueMetricFactory(params.HeroCacheMetricsFactory, params.NetworkingType)
   109  	clusterPrefixedCacheCollector := metrics.GossipSubRPCInspectorClusterPrefixedCacheMetricFactory(params.HeroCacheMetricsFactory, params.NetworkingType)
   110  
   111  	clusterPrefixedTracker, err := cache.NewClusterPrefixedMessagesReceivedTracker(params.Logger,
   112  		params.Config.ClusterPrefixedMessage.ControlMsgsReceivedCacheSize,
   113  		clusterPrefixedCacheCollector,
   114  		params.Config.ClusterPrefixedMessage.ControlMsgsReceivedCacheDecay)
   115  	if err != nil {
   116  		return nil, fmt.Errorf("failed to create cluster prefix topics received tracker")
   117  	}
   118  
   119  	if params.Config.PublishMessages.MaxSampleSize < params.Config.PublishMessages.ErrorThreshold {
   120  		return nil, fmt.Errorf("rpc message max sample size must be greater than or equal to rpc message error threshold, got %d and %d respectively",
   121  			params.Config.PublishMessages.MaxSampleSize,
   122  			params.Config.PublishMessages.ErrorThreshold)
   123  	}
   124  
   125  	c := &ControlMsgValidationInspector{
   126  		logger:         lg,
   127  		sporkID:        params.SporkID,
   128  		config:         params.Config,
   129  		distributor:    params.Distributor,
   130  		tracker:        clusterPrefixedTracker,
   131  		rpcTracker:     params.RpcTracker,
   132  		idProvider:     params.IdProvider,
   133  		metrics:        params.InspectorMetrics,
   134  		networkingType: params.NetworkingType,
   135  		topicOracle:    params.TopicOracle,
   136  	}
   137  
   138  	store := queue.NewHeroStore(params.Config.InspectionQueue.Size, params.Logger, inspectMsgQueueCacheCollector)
   139  
   140  	pool := worker.NewWorkerPoolBuilder[*InspectRPCRequest](lg, store, c.processInspectRPCReq).Build()
   141  
   142  	c.workerPool = pool
   143  
   144  	builder := component.NewComponentManagerBuilder()
   145  	builder.AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
   146  		c.logger.Debug().Msg("starting rpc inspector distributor")
   147  		c.ctx = ctx
   148  		c.distributor.Start(ctx)
   149  		select {
   150  		case <-ctx.Done():
   151  			c.logger.Debug().Msg("rpc inspector distributor startup aborted; context cancelled")
   152  		case <-c.distributor.Ready():
   153  			c.logger.Debug().Msg("rpc inspector distributor started")
   154  			ready()
   155  		}
   156  		<-ctx.Done()
   157  		c.logger.Debug().Msg("rpc inspector distributor stopped")
   158  		<-c.distributor.Done()
   159  		c.logger.Debug().Msg("rpc inspector distributor shutdown complete")
   160  	})
   161  	for i := 0; i < c.config.InspectionQueue.NumberOfWorkers; i++ {
   162  		builder.AddWorker(pool.WorkerLogic())
   163  	}
   164  	c.Component = builder.Build()
   165  	return c, nil
   166  }
   167  
   168  func (c *ControlMsgValidationInspector) Start(parent irrecoverable.SignalerContext) {
   169  	if c.topicOracle == nil {
   170  		parent.Throw(fmt.Errorf("control message validation inspector topic oracle not set"))
   171  	}
   172  	c.Component.Start(parent)
   173  }
   174  
   175  // Name returns the name of the rpc inspector.
   176  func (c *ControlMsgValidationInspector) Name() string {
   177  	return rpcInspectorComponentName
   178  }
   179  
   180  // ActiveClustersChanged consumes cluster ID update protocol events.
   181  func (c *ControlMsgValidationInspector) ActiveClustersChanged(clusterIDList flow.ChainIDList) {
   182  	c.tracker.StoreActiveClusterIds(clusterIDList)
   183  }
   184  
   185  // Inspect is called by gossipsub upon reception of a rpc from a remote  node.
   186  // It creates a new InspectRPCRequest for the RPC to be inspected async by the worker pool.
   187  // Args:
   188  //   - from: the sender.
   189  //   - rpc: the control message RPC.
   190  //
   191  // Returns:
   192  //   - error: if a new inspect rpc request cannot be created, all errors returned are considered irrecoverable.
   193  func (c *ControlMsgValidationInspector) Inspect(from peer.ID, rpc *pubsub.RPC) error {
   194  	// first truncate the rpc to the configured max sample size; if needed
   195  	c.truncateRPC(from, rpc)
   196  
   197  	// second, queue further async inspection
   198  	req, err := NewInspectRPCRequest(from, rpc)
   199  	if err != nil {
   200  		c.logger.Error().
   201  			Err(err).
   202  			Bool(logging.KeyNetworkingSecurity, true).
   203  			Str("peer_id", p2plogging.PeerId(from)).
   204  			Msg("failed to get inspect RPC request")
   205  		return fmt.Errorf("failed to get inspect RPC request: %w", err)
   206  	}
   207  	c.workerPool.Submit(req)
   208  
   209  	return nil
   210  }
   211  
   212  // updateMetrics updates the metrics for the received RPC.
   213  // Args:
   214  //   - from: the sender.
   215  //
   216  // - rpc: the control message RPC.
   217  func (c *ControlMsgValidationInspector) updateMetrics(from peer.ID, rpc *pubsub.RPC) {
   218  	includedMessages := len(rpc.GetPublish())
   219  	iHaveCount, iWantCount, graftCount, pruneCount := 0, 0, 0, 0
   220  	ctl := rpc.GetControl()
   221  	if ctl != nil {
   222  		iHaveCount = len(ctl.GetIhave())
   223  		iWantCount = len(ctl.GetIwant())
   224  		graftCount = len(ctl.GetGraft())
   225  		pruneCount = len(ctl.GetPrune())
   226  	}
   227  	c.metrics.OnIncomingRpcReceived(iHaveCount, iWantCount, graftCount, pruneCount, includedMessages)
   228  	if c.logger.GetLevel() > zerolog.TraceLevel {
   229  		return // skip logging if trace level is not enabled
   230  	}
   231  	c.logger.Trace().
   232  		Str("peer_id", p2plogging.PeerId(from)).
   233  		Int("iHaveCount", iHaveCount).
   234  		Int("iWantCount", iWantCount).
   235  		Int("graftCount", graftCount).
   236  		Int("pruneCount", pruneCount).
   237  		Int("included_message_count", includedMessages).
   238  		Msg("received rpc with control messages")
   239  }
   240  
   241  // processInspectRPCReq func used by component workers to perform further inspection of RPC control messages that will validate ensure all control message
   242  // types are valid in the RPC.
   243  // Args:
   244  //   - req: the inspect rpc request.
   245  //
   246  // Returns:
   247  //   - error: no error is expected to be returned from this func as they are logged and distributed in invalid control message notifications.
   248  func (c *ControlMsgValidationInspector) processInspectRPCReq(req *InspectRPCRequest) error {
   249  	c.updateMetrics(req.Peer, req.rpc)
   250  	c.metrics.AsyncProcessingStarted()
   251  	start := time.Now()
   252  	defer func() {
   253  		c.metrics.AsyncProcessingFinished(time.Since(start))
   254  	}()
   255  
   256  	activeClusterIDS := c.tracker.GetActiveClusterIds()
   257  	for _, ctrlMsgType := range p2pmsg.ControlMessageTypes() {
   258  		switch ctrlMsgType {
   259  		case p2pmsg.CtrlMsgGraft:
   260  			err, topicType := c.inspectGraftMessages(req.Peer, req.rpc.GetControl().GetGraft(), activeClusterIDS)
   261  			if err != nil {
   262  				c.logAndDistributeAsyncInspectErrs(req, p2pmsg.CtrlMsgGraft, err, 1, topicType)
   263  				return nil
   264  			}
   265  		case p2pmsg.CtrlMsgPrune:
   266  			err, topicType := c.inspectPruneMessages(req.Peer, req.rpc.GetControl().GetPrune(), activeClusterIDS)
   267  			if err != nil {
   268  				c.logAndDistributeAsyncInspectErrs(req, p2pmsg.CtrlMsgPrune, err, 1, topicType)
   269  				return nil
   270  			}
   271  		case p2pmsg.CtrlMsgIWant:
   272  			err := c.inspectIWantMessages(req.Peer, req.rpc.GetControl().GetIwant())
   273  			if err != nil {
   274  				c.logAndDistributeAsyncInspectErrs(req, p2pmsg.CtrlMsgIWant, err, 1, p2p.CtrlMsgNonClusterTopicType)
   275  				return nil
   276  			}
   277  		case p2pmsg.CtrlMsgIHave:
   278  			err, topicType := c.inspectIHaveMessages(req.Peer, req.rpc.GetControl().GetIhave(), activeClusterIDS)
   279  			if err != nil {
   280  				c.logAndDistributeAsyncInspectErrs(req, p2pmsg.CtrlMsgIHave, err, 1, topicType)
   281  				return nil
   282  			}
   283  		}
   284  	}
   285  
   286  	// inspect rpc publish messages after all control message validation has passed
   287  	err, errCount := c.inspectRpcPublishMessages(req.Peer, req.rpc.GetPublish(), activeClusterIDS)
   288  	if err != nil {
   289  		c.logAndDistributeAsyncInspectErrs(req, p2pmsg.RpcPublishMessage, err, errCount, p2p.CtrlMsgNonClusterTopicType)
   290  		return nil
   291  	}
   292  
   293  	return nil
   294  }
   295  
   296  // checkPubsubMessageSender checks the sender of the sender of pubsub message to ensure they are not unstaked, or ejected.
   297  // This check is only required on private networks.
   298  // Args:
   299  //   - message: the pubsub message.
   300  //
   301  // Returns:
   302  //   - error: if the peer ID cannot be created from bytes, sender is unknown or the identity is ejected.
   303  //
   304  // All errors returned from this function can be considered benign.
   305  func (c *ControlMsgValidationInspector) checkPubsubMessageSender(message *pubsub_pb.Message) error {
   306  	pid, err := peer.IDFromBytes(message.GetFrom())
   307  	if err != nil {
   308  		return fmt.Errorf("failed to get peer ID from bytes: %w", err)
   309  	}
   310  	if id, ok := c.idProvider.ByPeerID(pid); !ok {
   311  		return fmt.Errorf("received rpc publish message from unstaked peer: %s", pid)
   312  	} else if id.Ejected {
   313  		return fmt.Errorf("received rpc publish message from ejected peer: %s", pid)
   314  	}
   315  
   316  	return nil
   317  }
   318  
   319  // inspectGraftMessages performs topic validation on all grafts in the control message using the provided validateTopic func while tracking duplicates.
   320  // Args:
   321  // - from: peer ID of the sender.
   322  // - grafts: the list of grafts to inspect.
   323  // - activeClusterIDS: the list of active cluster ids.
   324  // Returns:
   325  // - DuplicateTopicErr: if there are any duplicate topics in the list of grafts
   326  // - error: if any error occurs while sampling or validating topics, all returned errors are benign and should not cause the node to crash.
   327  // - bool: true if an error is returned and the topic that failed validation was a cluster prefixed topic, false otherwise.
   328  func (c *ControlMsgValidationInspector) inspectGraftMessages(from peer.ID, grafts []*pubsub_pb.ControlGraft, activeClusterIDS flow.ChainIDList) (error, p2p.CtrlMsgTopicType) {
   329  	duplicateTopicTracker := make(duplicateStrTracker)
   330  	totalDuplicateTopicIds := 0
   331  	defer func() {
   332  		// regardless of inspection result, update metrics
   333  		c.metrics.OnGraftMessageInspected(totalDuplicateTopicIds)
   334  	}()
   335  	for _, graft := range grafts {
   336  		topic := channels.Topic(graft.GetTopicID())
   337  		if duplicateTopicTracker.track(topic.String()) > 1 {
   338  			// ideally, a GRAFT message should not have any duplicate topics, hence a topic ID is counted as a duplicate only if it is repeated more than once.
   339  			totalDuplicateTopicIds++
   340  			// check if the total number of duplicates exceeds the configured threshold.
   341  			if totalDuplicateTopicIds > c.config.GraftPrune.DuplicateTopicIdThreshold {
   342  				c.metrics.OnGraftDuplicateTopicIdsExceedThreshold()
   343  				return NewDuplicateTopicErr(topic.String(), totalDuplicateTopicIds, p2pmsg.CtrlMsgGraft), p2p.CtrlMsgNonClusterTopicType
   344  			}
   345  		}
   346  		err, ctrlMsgType := c.validateTopic(from, topic, activeClusterIDS)
   347  		if err != nil {
   348  			// TODO: consider adding a threshold for this error similar to the duplicate topic id threshold.
   349  			c.metrics.OnInvalidTopicIdDetectedForControlMessage(p2pmsg.CtrlMsgGraft)
   350  			return err, ctrlMsgType
   351  		}
   352  	}
   353  	return nil, p2p.CtrlMsgNonClusterTopicType
   354  }
   355  
   356  // inspectPruneMessages performs topic validation on all prunes in the control message using the provided validateTopic func while tracking duplicates.
   357  // Args:
   358  // - from: peer ID of the sender.
   359  // - prunes: the list of iHaves to inspect.
   360  // - activeClusterIDS: the list of active cluster ids.
   361  // Returns:
   362  //   - DuplicateTopicErr: if there are any duplicate topics found in the list of iHaves
   363  //     or any duplicate message ids found inside a single iHave.
   364  //   - error: if any error occurs while sampling or validating topics, all returned errors are benign and should not cause the node to crash.
   365  //   - bool: true if an error is returned and the topic that failed validation was a cluster prefixed topic, false otherwise.
   366  func (c *ControlMsgValidationInspector) inspectPruneMessages(from peer.ID, prunes []*pubsub_pb.ControlPrune, activeClusterIDS flow.ChainIDList) (error, p2p.CtrlMsgTopicType) {
   367  	tracker := make(duplicateStrTracker)
   368  	totalDuplicateTopicIds := 0
   369  	defer func() {
   370  		// regardless of inspection result, update metrics
   371  		c.metrics.OnPruneMessageInspected(totalDuplicateTopicIds)
   372  	}()
   373  	for _, prune := range prunes {
   374  		topic := channels.Topic(prune.GetTopicID())
   375  		if tracker.track(topic.String()) > 1 {
   376  			// ideally, a PRUNE message should not have any duplicate topics, hence a topic ID is counted as a duplicate only if it is repeated more than once.
   377  			totalDuplicateTopicIds++
   378  			// check if the total number of duplicates exceeds the configured threshold.
   379  			if totalDuplicateTopicIds > c.config.GraftPrune.DuplicateTopicIdThreshold {
   380  				c.metrics.OnPruneDuplicateTopicIdsExceedThreshold()
   381  				return NewDuplicateTopicErr(topic.String(), totalDuplicateTopicIds, p2pmsg.CtrlMsgPrune), p2p.CtrlMsgNonClusterTopicType
   382  			}
   383  		}
   384  		err, ctrlMsgType := c.validateTopic(from, topic, activeClusterIDS)
   385  		if err != nil {
   386  			// TODO: consider adding a threshold for this error similar to the duplicate topic id threshold.
   387  			c.metrics.OnInvalidTopicIdDetectedForControlMessage(p2pmsg.CtrlMsgPrune)
   388  			return err, ctrlMsgType
   389  		}
   390  	}
   391  	return nil, p2p.CtrlMsgNonClusterTopicType
   392  }
   393  
   394  // inspectIHaveMessages performs topic validation on all ihaves in the control message using the provided validateTopic func while tracking duplicates.
   395  // Args:
   396  // - from: peer ID of the sender.
   397  // - iHaves: the list of iHaves to inspect.
   398  // - activeClusterIDS: the list of active cluster ids.
   399  // Returns:
   400  //   - DuplicateTopicErr: if there are any duplicate topics found in the list of iHaves
   401  //     or any duplicate message ids found inside a single iHave.
   402  //   - error: if any error occurs while sampling or validating topics, all returned errors are benign and should not cause the node to crash.
   403  //   - bool: true if an error is returned and the topic that failed validation was a cluster prefixed topic, false otherwise.
   404  func (c *ControlMsgValidationInspector) inspectIHaveMessages(from peer.ID, ihaves []*pubsub_pb.ControlIHave, activeClusterIDS flow.ChainIDList) (error, p2p.CtrlMsgTopicType) {
   405  	if len(ihaves) == 0 {
   406  		return nil, p2p.CtrlMsgNonClusterTopicType
   407  	}
   408  	lg := c.logger.With().
   409  		Str("peer_id", p2plogging.PeerId(from)).
   410  		Int("sample_size", len(ihaves)).
   411  		Int("max_sample_size", c.config.IHave.MessageCountThreshold).
   412  		Logger()
   413  	duplicateTopicTracker := make(duplicateStrTracker)
   414  	duplicateMessageIDTracker := make(duplicateStrTracker)
   415  	totalMessageIds := 0
   416  	totalDuplicateTopicIds := 0
   417  	totalDuplicateMessageIds := 0
   418  	defer func() {
   419  		// regardless of inspection result, update metrics
   420  		c.metrics.OnIHaveMessagesInspected(totalDuplicateTopicIds, totalDuplicateMessageIds)
   421  	}()
   422  	for _, ihave := range ihaves {
   423  		messageIds := ihave.GetMessageIDs()
   424  		topic := ihave.GetTopicID()
   425  		totalMessageIds += len(messageIds)
   426  
   427  		// first check if the topic is valid, fail fast if it is not
   428  		err, ctrlMsgType := c.validateTopic(from, channels.Topic(topic), activeClusterIDS)
   429  		if err != nil {
   430  			// TODO: consider adding a threshold for this error similar to the duplicate topic id threshold.
   431  			c.metrics.OnInvalidTopicIdDetectedForControlMessage(p2pmsg.CtrlMsgIHave)
   432  			return err, ctrlMsgType
   433  		}
   434  
   435  		// then track the topic ensuring it is not beyond a duplicate threshold.
   436  		if duplicateTopicTracker.track(topic) > 1 {
   437  			totalDuplicateTopicIds++
   438  			// the topic is duplicated, check if the total number of duplicates exceeds the configured threshold
   439  			if totalDuplicateTopicIds > c.config.IHave.DuplicateTopicIdThreshold {
   440  				c.metrics.OnIHaveDuplicateTopicIdsExceedThreshold()
   441  				return NewDuplicateTopicErr(topic, totalDuplicateTopicIds, p2pmsg.CtrlMsgIHave), p2p.CtrlMsgNonClusterTopicType
   442  			}
   443  		}
   444  
   445  		for _, messageID := range messageIds {
   446  			if duplicateMessageIDTracker.track(messageID) > 1 {
   447  				totalDuplicateMessageIds++
   448  				// the message is duplicated, check if the total number of duplicates exceeds the configured threshold
   449  				if totalDuplicateMessageIds > c.config.IHave.DuplicateMessageIdThreshold {
   450  					c.metrics.OnIHaveDuplicateMessageIdsExceedThreshold()
   451  					return NewDuplicateMessageIDErr(messageID, totalDuplicateMessageIds, p2pmsg.CtrlMsgIHave), p2p.CtrlMsgNonClusterTopicType
   452  				}
   453  			}
   454  		}
   455  	}
   456  	lg.Debug().
   457  		Int("total_message_ids", totalMessageIds).
   458  		Int("total_duplicate_topic_ids", totalDuplicateTopicIds).
   459  		Int("total_duplicate_message_ids", totalDuplicateMessageIds).
   460  		Msg("ihave control message validation complete")
   461  	return nil, p2p.CtrlMsgNonClusterTopicType
   462  }
   463  
   464  // inspectIWantMessages inspects RPC iWant control messages. This func will sample the iWants and perform validation on each iWant in the sample.
   465  // Ensuring that the following are true:
   466  // - Each iWant corresponds to an iHave that was sent.
   467  // - Each topic in the iWant sample is a valid topic.
   468  // If the number of iWants that do not have a corresponding iHave exceed the configured threshold an error is returned.
   469  // Args:
   470  // - from: peer ID of the sender.
   471  // - iWant: the list of iWant control messages.
   472  // Returns:
   473  // - DuplicateTopicErr: if there are any duplicate message ids found in any of the iWants.
   474  // - IWantCacheMissThresholdErr: if the rate of cache misses exceeds the configured allowed threshold.
   475  func (c *ControlMsgValidationInspector) inspectIWantMessages(from peer.ID, iWants []*pubsub_pb.ControlIWant) error {
   476  	if len(iWants) == 0 {
   477  		return nil
   478  	}
   479  	lastHighest := c.rpcTracker.LastHighestIHaveRPCSize()
   480  	lg := c.logger.With().
   481  		Str("peer_id", p2plogging.PeerId(from)).
   482  		Uint("max_sample_size", c.config.IWant.MessageCountThreshold).
   483  		Int64("last_highest_ihave_rpc_size", lastHighest).
   484  		Logger()
   485  	duplicateMsgIdTracker := make(duplicateStrTracker)
   486  	cacheMisses := 0
   487  	duplicateMessageIds := 0
   488  	defer func() {
   489  		// regardless of inspection result, update metrics
   490  		c.metrics.OnIWantMessagesInspected(duplicateMessageIds, cacheMisses)
   491  	}()
   492  
   493  	lg = lg.With().
   494  		Int("iwant_msg_count", len(iWants)).
   495  		Int("cache_misses_threshold", c.config.IWant.CacheMissThreshold).
   496  		Int("duplicates_threshold", c.config.IWant.DuplicateMsgIdThreshold).Logger()
   497  
   498  	lg.Trace().Msg("validating sample of message ids from iwant control message")
   499  
   500  	totalMessageIds := 0
   501  	for _, iWant := range iWants {
   502  		messageIds := iWant.GetMessageIDs()
   503  		messageIDCount := uint(len(messageIds))
   504  		for _, messageID := range messageIds {
   505  			// check duplicate allowed threshold
   506  			if duplicateMsgIdTracker.track(messageID) > 1 {
   507  				// ideally, an iWant message should not have any duplicate message IDs, hence a message id is considered duplicate when it is repeated more than once.
   508  				duplicateMessageIds++
   509  				if duplicateMessageIds > c.config.IWant.DuplicateMsgIdThreshold {
   510  					c.metrics.OnIWantDuplicateMessageIdsExceedThreshold()
   511  					return NewIWantDuplicateMsgIDThresholdErr(duplicateMessageIds, messageIDCount, c.config.IWant.DuplicateMsgIdThreshold)
   512  				}
   513  			}
   514  			// check cache miss threshold
   515  			if !c.rpcTracker.WasIHaveRPCSent(messageID) {
   516  				cacheMisses++
   517  				if cacheMisses > c.config.IWant.CacheMissThreshold {
   518  					c.metrics.OnIWantCacheMissMessageIdsExceedThreshold()
   519  					return NewIWantCacheMissThresholdErr(cacheMisses, messageIDCount, c.config.IWant.CacheMissThreshold)
   520  				}
   521  			}
   522  			duplicateMsgIdTracker.track(messageID)
   523  			totalMessageIds++
   524  		}
   525  	}
   526  
   527  	lg.Debug().
   528  		Int("total_message_ids", totalMessageIds).
   529  		Int("cache_misses", cacheMisses).
   530  		Int("total_duplicate_message_ids", duplicateMessageIds).
   531  		Msg("iwant control message validation complete")
   532  
   533  	return nil
   534  }
   535  
   536  // inspectRpcPublishMessages inspects a sample of the RPC gossip messages and performs topic validation that ensures the following:
   537  // - Topics are known flow topics.
   538  // - Topics are valid flow topics.
   539  // - Topics are in the nodes subscribe topics list.
   540  // If more than half the topics in the sample contain invalid topics an error will be returned.
   541  // Args:
   542  // - from: peer ID of the sender.
   543  // - messages: rpc publish messages.
   544  // - activeClusterIDS: the list of active cluster ids.
   545  // Returns:
   546  // - InvalidRpcPublishMessagesErr: if the amount of invalid messages exceeds the configured RPCMessageErrorThreshold.
   547  // - int: the number of invalid pubsub messages
   548  func (c *ControlMsgValidationInspector) inspectRpcPublishMessages(from peer.ID, messages []*pubsub_pb.Message, activeClusterIDS flow.ChainIDList) (error, uint64) {
   549  	totalMessages := len(messages)
   550  	if totalMessages == 0 {
   551  		return nil, 0
   552  	}
   553  	sampleSize := c.config.PublishMessages.MaxSampleSize
   554  	if sampleSize > totalMessages {
   555  		sampleSize = totalMessages
   556  	}
   557  	c.performSample(p2pmsg.RpcPublishMessage, uint(totalMessages), uint(sampleSize), func(i, j uint) {
   558  		messages[i], messages[j] = messages[j], messages[i]
   559  	})
   560  
   561  	subscribedTopics := c.topicOracle().GetTopics()
   562  	hasSubscription := func(topic string) bool {
   563  		for _, subscribedTopic := range subscribedTopics {
   564  			if topic == subscribedTopic {
   565  				return true
   566  			}
   567  		}
   568  		return false
   569  	}
   570  	var errs *multierror.Error
   571  	invalidTopicIdsCount := 0
   572  	invalidSubscriptionsCount := 0
   573  	invalidSendersCount := 0
   574  	defer func() {
   575  		// regardless of inspection result, update metrics
   576  		errCnt := 0
   577  		if errs != nil {
   578  			errCnt = errs.Len()
   579  		}
   580  		c.metrics.OnPublishMessageInspected(errCnt, invalidTopicIdsCount, invalidSubscriptionsCount, invalidSendersCount)
   581  	}()
   582  	for _, message := range messages[:sampleSize] {
   583  		if c.networkingType == network.PrivateNetwork {
   584  			err := c.checkPubsubMessageSender(message)
   585  			if err != nil {
   586  				invalidSendersCount++
   587  				errs = multierror.Append(errs, err)
   588  				continue
   589  			}
   590  		}
   591  		topic := channels.Topic(message.GetTopic())
   592  		// The boolean value returned when validating a topic, indicating whether the topic is cluster-prefixed or not, is intentionally ignored.
   593  		// This is because we have already set a threshold for errors allowed on publish messages. Reducing the penalty further based on
   594  		// cluster prefix status is unnecessary when the error threshold is exceeded.
   595  		err, _ := c.validateTopic(from, topic, activeClusterIDS)
   596  		if err != nil {
   597  			// we can skip checking for subscription of topic that failed validation and continue
   598  			invalidTopicIdsCount++
   599  			errs = multierror.Append(errs, err)
   600  			continue
   601  		}
   602  
   603  		if !hasSubscription(topic.String()) {
   604  			invalidSubscriptionsCount++
   605  			errs = multierror.Append(errs, fmt.Errorf("subscription for topic %s not found", topic))
   606  		}
   607  	}
   608  
   609  	// return an error when we exceed the error threshold
   610  	if errs != nil && errs.Len() > c.config.PublishMessages.ErrorThreshold {
   611  		c.metrics.OnPublishMessagesInspectionErrorExceedsThreshold()
   612  		return NewInvalidRpcPublishMessagesErr(errs.ErrorOrNil(), errs.Len()), uint64(errs.Len())
   613  	}
   614  
   615  	return nil, 0
   616  }
   617  
   618  // truncateRPC truncates the RPC by truncating each control message type using the configured max sample size values.
   619  // Args:
   620  // - from: peer ID of the sender.
   621  // - rpc: the pubsub RPC.
   622  func (c *ControlMsgValidationInspector) truncateRPC(from peer.ID, rpc *pubsub.RPC) {
   623  	for _, ctlMsgType := range p2pmsg.ControlMessageTypes() {
   624  		switch ctlMsgType {
   625  		case p2pmsg.CtrlMsgGraft:
   626  			c.truncateGraftMessages(rpc)
   627  		case p2pmsg.CtrlMsgPrune:
   628  			c.truncatePruneMessages(rpc)
   629  		case p2pmsg.CtrlMsgIHave:
   630  			c.truncateIHaveMessages(rpc)
   631  		case p2pmsg.CtrlMsgIWant:
   632  			c.truncateIWantMessages(from, rpc)
   633  		default:
   634  			// sanity check this should never happen
   635  			c.logAndThrowError(fmt.Errorf("unknown control message type encountered during RPC truncation"))
   636  		}
   637  	}
   638  }
   639  
   640  // truncateGraftMessages truncates the Graft control messages in the RPC. If the total number of Grafts in the RPC exceeds the configured
   641  // GraftPruneMessageMaxSampleSize the list of Grafts will be truncated.
   642  // Args:
   643  //   - rpc: the rpc message to truncate.
   644  func (c *ControlMsgValidationInspector) truncateGraftMessages(rpc *pubsub.RPC) {
   645  	grafts := rpc.GetControl().GetGraft()
   646  	originalGraftSize := len(grafts)
   647  	if originalGraftSize <= c.config.GraftPrune.MessageCountThreshold {
   648  		return // nothing to truncate
   649  	}
   650  
   651  	// truncate grafts and update metrics
   652  	sampleSize := c.config.GraftPrune.MessageCountThreshold
   653  	c.performSample(p2pmsg.CtrlMsgGraft, uint(originalGraftSize), uint(sampleSize), func(i, j uint) {
   654  		grafts[i], grafts[j] = grafts[j], grafts[i]
   655  	})
   656  	rpc.Control.Graft = grafts[:sampleSize]
   657  	c.metrics.OnControlMessagesTruncated(p2pmsg.CtrlMsgGraft, originalGraftSize-len(rpc.Control.Graft))
   658  }
   659  
   660  // truncatePruneMessages truncates the Prune control messages in the RPC. If the total number of Prunes in the RPC exceeds the configured
   661  // GraftPruneMessageMaxSampleSize the list of Prunes will be truncated.
   662  // Args:
   663  //   - rpc: the rpc message to truncate.
   664  func (c *ControlMsgValidationInspector) truncatePruneMessages(rpc *pubsub.RPC) {
   665  	prunes := rpc.GetControl().GetPrune()
   666  	originalPruneSize := len(prunes)
   667  	if originalPruneSize <= c.config.GraftPrune.MessageCountThreshold {
   668  		return // nothing to truncate
   669  	}
   670  
   671  	sampleSize := c.config.GraftPrune.MessageCountThreshold
   672  	c.performSample(p2pmsg.CtrlMsgPrune, uint(originalPruneSize), uint(sampleSize), func(i, j uint) {
   673  		prunes[i], prunes[j] = prunes[j], prunes[i]
   674  	})
   675  	rpc.Control.Prune = prunes[:sampleSize]
   676  	c.metrics.OnControlMessagesTruncated(p2pmsg.CtrlMsgPrune, originalPruneSize-len(rpc.Control.Prune))
   677  }
   678  
   679  // truncateIHaveMessages truncates the iHaves control messages in the RPC. If the total number of iHaves in the RPC exceeds the configured
   680  // MessageCountThreshold the list of iHaves will be truncated.
   681  // Args:
   682  //   - rpc: the rpc message to truncate.
   683  func (c *ControlMsgValidationInspector) truncateIHaveMessages(rpc *pubsub.RPC) {
   684  	ihaves := rpc.GetControl().GetIhave()
   685  	originalIHaveCount := len(ihaves)
   686  	if originalIHaveCount == 0 {
   687  		return
   688  	}
   689  
   690  	if originalIHaveCount > c.config.IHave.MessageCountThreshold {
   691  		// truncate ihaves and update metrics
   692  		sampleSize := c.config.IHave.MessageCountThreshold
   693  		if sampleSize > originalIHaveCount {
   694  			sampleSize = originalIHaveCount
   695  		}
   696  		c.performSample(p2pmsg.CtrlMsgIHave, uint(originalIHaveCount), uint(sampleSize), func(i, j uint) {
   697  			ihaves[i], ihaves[j] = ihaves[j], ihaves[i]
   698  		})
   699  		rpc.Control.Ihave = ihaves[:sampleSize]
   700  		c.metrics.OnControlMessagesTruncated(p2pmsg.CtrlMsgIHave, originalIHaveCount-len(rpc.Control.Ihave))
   701  	}
   702  	c.truncateIHaveMessageIds(rpc)
   703  }
   704  
   705  // truncateIHaveMessageIds truncates the message ids for each iHave control message in the RPC. If the total number of message ids in a single iHave exceeds the configured
   706  // MessageIdCountThreshold the list of message ids will be truncated. Before message ids are truncated the iHave control messages should have been truncated themselves.
   707  // Args:
   708  //   - rpc: the rpc message to truncate.
   709  func (c *ControlMsgValidationInspector) truncateIHaveMessageIds(rpc *pubsub.RPC) {
   710  	for _, ihave := range rpc.GetControl().GetIhave() {
   711  		messageIDs := ihave.GetMessageIDs()
   712  		originalMessageIdCount := len(messageIDs)
   713  		if originalMessageIdCount == 0 {
   714  			continue // nothing to truncate; skip
   715  		}
   716  
   717  		if originalMessageIdCount > c.config.IHave.MessageIdCountThreshold {
   718  			sampleSize := c.config.IHave.MessageIdCountThreshold
   719  			if sampleSize > originalMessageIdCount {
   720  				sampleSize = originalMessageIdCount
   721  			}
   722  			c.performSample(p2pmsg.CtrlMsgIHave, uint(originalMessageIdCount), uint(sampleSize), func(i, j uint) {
   723  				messageIDs[i], messageIDs[j] = messageIDs[j], messageIDs[i]
   724  			})
   725  			ihave.MessageIDs = messageIDs[:sampleSize]
   726  			c.metrics.OnIHaveControlMessageIdsTruncated(originalMessageIdCount - len(ihave.MessageIDs))
   727  		}
   728  		c.metrics.OnIHaveMessageIDsReceived(ihave.GetTopicID(), len(ihave.MessageIDs))
   729  	}
   730  }
   731  
   732  // truncateIWantMessages truncates the iWant control messages in the RPC. If the total number of iWants in the RPC exceeds the configured
   733  // MessageCountThreshold the list of iWants will be truncated.
   734  // Args:
   735  //   - rpc: the rpc message to truncate.
   736  func (c *ControlMsgValidationInspector) truncateIWantMessages(from peer.ID, rpc *pubsub.RPC) {
   737  	iWants := rpc.GetControl().GetIwant()
   738  	originalIWantCount := uint(len(iWants))
   739  	if originalIWantCount == 0 {
   740  		return
   741  	}
   742  
   743  	if originalIWantCount > c.config.IWant.MessageCountThreshold {
   744  		// truncate iWants and update metrics
   745  		sampleSize := c.config.IWant.MessageCountThreshold
   746  		if sampleSize > originalIWantCount {
   747  			sampleSize = originalIWantCount
   748  		}
   749  		c.performSample(p2pmsg.CtrlMsgIWant, originalIWantCount, sampleSize, func(i, j uint) {
   750  			iWants[i], iWants[j] = iWants[j], iWants[i]
   751  		})
   752  		rpc.Control.Iwant = iWants[:sampleSize]
   753  		c.metrics.OnControlMessagesTruncated(p2pmsg.CtrlMsgIWant, int(originalIWantCount)-len(rpc.Control.Iwant))
   754  	}
   755  	c.truncateIWantMessageIds(from, rpc)
   756  }
   757  
   758  // truncateIWantMessageIds truncates the message ids for each iWant control message in the RPC. If the total number of message ids in a single iWant exceeds the configured
   759  // MessageIdCountThreshold the list of message ids will be truncated. Before message ids are truncated the iWant control messages should have been truncated themselves.
   760  // Args:
   761  //   - rpc: the rpc message to truncate.
   762  func (c *ControlMsgValidationInspector) truncateIWantMessageIds(from peer.ID, rpc *pubsub.RPC) {
   763  	lastHighest := c.rpcTracker.LastHighestIHaveRPCSize()
   764  	lg := c.logger.With().
   765  		Str("peer_id", p2plogging.PeerId(from)).
   766  		Uint("max_sample_size", c.config.IWant.MessageCountThreshold).
   767  		Int64("last_highest_ihave_rpc_size", lastHighest).
   768  		Logger()
   769  
   770  	sampleSize := int(10 * lastHighest)
   771  	if sampleSize == 0 || sampleSize > c.config.IWant.MessageIdCountThreshold {
   772  		// invalid or 0 sample size is suspicious
   773  		lg.Warn().Str(logging.KeySuspicious, "true").Msg("zero or invalid sample size, using default max sample size")
   774  		sampleSize = c.config.IWant.MessageIdCountThreshold
   775  	}
   776  	for _, iWant := range rpc.GetControl().GetIwant() {
   777  		messageIDs := iWant.GetMessageIDs()
   778  		totalMessageIdCount := len(messageIDs)
   779  		if totalMessageIdCount == 0 {
   780  			continue // nothing to truncate; skip
   781  		}
   782  
   783  		if totalMessageIdCount > sampleSize {
   784  			c.performSample(p2pmsg.CtrlMsgIWant, uint(totalMessageIdCount), uint(sampleSize), func(i, j uint) {
   785  				messageIDs[i], messageIDs[j] = messageIDs[j], messageIDs[i]
   786  			})
   787  			iWant.MessageIDs = messageIDs[:sampleSize]
   788  			c.metrics.OnIWantControlMessageIdsTruncated(totalMessageIdCount - len(iWant.MessageIDs))
   789  		}
   790  		c.metrics.OnIWantMessageIDsReceived(len(iWant.MessageIDs))
   791  	}
   792  }
   793  
   794  // performSample performs sampling on the specified control message that will randomize
   795  // the items in the control message slice up to index sampleSize-1. Any error encountered during sampling is considered
   796  // irrecoverable and will cause the node to crash.
   797  func (c *ControlMsgValidationInspector) performSample(ctrlMsg p2pmsg.ControlMessageType, totalSize, sampleSize uint, swap func(i, j uint)) {
   798  	err := flowrand.Samples(totalSize, sampleSize, swap)
   799  	if err != nil {
   800  		c.logAndThrowError(fmt.Errorf("failed to get random sample of %s control messages: %w", ctrlMsg, err))
   801  	}
   802  }
   803  
   804  // validateTopic ensures the topic is a valid flow topic/channel.
   805  // Expected error returns during normal operations:
   806  //   - channels.InvalidTopicErr: if topic is invalid.
   807  //   - ErrActiveClusterIdsNotSet: if the cluster ID provider is not set.
   808  //   - channels.UnknownClusterIDErr: if the topic contains a cluster ID prefix that is not in the active cluster IDs list.
   809  //
   810  // This func returns an exception in case of unexpected bug or state corruption if cluster prefixed topic validation
   811  // fails due to unexpected error returned when getting the active cluster IDS.
   812  func (c *ControlMsgValidationInspector) validateTopic(from peer.ID, topic channels.Topic, activeClusterIds flow.ChainIDList) (error, p2p.CtrlMsgTopicType) {
   813  	channel, ok := channels.ChannelFromTopic(topic)
   814  	if !ok {
   815  		return channels.NewInvalidTopicErr(topic, fmt.Errorf("failed to get channel from topic")), p2p.CtrlMsgNonClusterTopicType
   816  	}
   817  	// handle cluster prefixed topics
   818  	if channels.IsClusterChannel(channel) {
   819  		return c.validateClusterPrefixedTopic(from, topic, activeClusterIds), p2p.CtrlMsgTopicTypeClusterPrefixed
   820  	}
   821  
   822  	// non cluster prefixed topic validation
   823  	err := channels.IsValidNonClusterFlowTopic(topic, c.sporkID)
   824  	if err != nil {
   825  		return err, p2p.CtrlMsgNonClusterTopicType
   826  	}
   827  	return nil, p2p.CtrlMsgNonClusterTopicType
   828  }
   829  
   830  // validateClusterPrefixedTopic validates cluster prefixed topics.
   831  // Expected error returns during normal operations:
   832  //   - ErrActiveClusterIdsNotSet: if the cluster ID provider is not set.
   833  //   - channels.InvalidTopicErr: if topic is invalid.
   834  //   - channels.UnknownClusterIDErr: if the topic contains a cluster ID prefix that is not in the active cluster IDs list.
   835  //
   836  // In the case where an ErrActiveClusterIdsNotSet or UnknownClusterIDErr is encountered and the cluster prefixed topic received
   837  // tracker for the peer is less than or equal to the configured HardThreshold an error will only be logged and not returned.
   838  // At the point where the hard threshold is crossed the error will be returned and the sender will start to be penalized.
   839  // Any errors encountered while incrementing or loading the cluster prefixed control message gauge for a peer will result in an irrecoverable error being thrown, these
   840  // errors are unexpected and irrecoverable indicating a bug.
   841  func (c *ControlMsgValidationInspector) validateClusterPrefixedTopic(from peer.ID, topic channels.Topic, activeClusterIds flow.ChainIDList) error {
   842  	lg := c.logger.With().
   843  		Str("from", p2plogging.PeerId(from)).
   844  		Logger()
   845  
   846  	// only staked nodes are expected to participate on cluster prefixed topics
   847  	nodeID, err := c.getFlowIdentifier(from)
   848  	if err != nil {
   849  		return err
   850  	}
   851  	if len(activeClusterIds) == 0 {
   852  		// cluster IDs have not been updated yet
   853  		_, incErr := c.tracker.Inc(nodeID)
   854  		if incErr != nil {
   855  			// irrecoverable error encountered
   856  			c.logAndThrowError(fmt.Errorf("error encountered while incrementing the cluster prefixed control message gauge %s: %w", nodeID, err))
   857  		}
   858  
   859  		// if the amount of messages received is below our hard threshold log the error and return nil.
   860  		if ok := c.checkClusterPrefixHardThreshold(nodeID); ok {
   861  			lg.Warn().
   862  				Err(err).
   863  				Str("topic", topic.String()).
   864  				Msg("failed to validate cluster prefixed control message with cluster pre-fixed topic active cluster ids not set")
   865  			return nil
   866  		}
   867  
   868  		return NewActiveClusterIdsNotSetErr(topic)
   869  	}
   870  
   871  	err = channels.IsValidFlowClusterTopic(topic, activeClusterIds)
   872  	if err != nil {
   873  		if channels.IsUnknownClusterIDErr(err) {
   874  			// unknown cluster ID error could indicate that a node has fallen
   875  			// behind and needs to catchup increment to topics received cache.
   876  			_, incErr := c.tracker.Inc(nodeID)
   877  			if incErr != nil {
   878  				c.logAndThrowError(fmt.Errorf("error encountered while incrementing the cluster prefixed control message gauge %s: %w", nodeID, err))
   879  			}
   880  			// if the amount of messages received is below our hard threshold log the error and return nil.
   881  			if c.checkClusterPrefixHardThreshold(nodeID) {
   882  				lg.Warn().
   883  					Err(err).
   884  					Str("topic", topic.String()).
   885  					Msg("processing unknown cluster prefixed topic received below cluster prefixed discard threshold peer may be behind in the protocol")
   886  				return nil
   887  			}
   888  		}
   889  		return err
   890  	}
   891  
   892  	return nil
   893  }
   894  
   895  // getFlowIdentifier returns the flow identity identifier for a peer.
   896  // Args:
   897  //   - peerID: the peer id of the sender.
   898  //
   899  // The returned error indicates that the peer is un-staked.
   900  func (c *ControlMsgValidationInspector) getFlowIdentifier(peerID peer.ID) (flow.Identifier, error) {
   901  	id, ok := c.idProvider.ByPeerID(peerID)
   902  	if !ok {
   903  		return flow.ZeroID, NewUnstakedPeerErr(fmt.Errorf("failed to get flow identity for peer: %s", peerID))
   904  	}
   905  	return id.ID(), nil
   906  }
   907  
   908  // checkClusterPrefixHardThreshold returns true if the cluster prefix received tracker count is less than
   909  // the configured HardThreshold, false otherwise.
   910  // If any error is encountered while loading from the tracker this func will throw an error on the signaler context, these errors
   911  // are unexpected and irrecoverable indicating a bug.
   912  func (c *ControlMsgValidationInspector) checkClusterPrefixHardThreshold(nodeID flow.Identifier) bool {
   913  	gauge, err := c.tracker.Load(nodeID)
   914  	if err != nil {
   915  		// irrecoverable error encountered
   916  		c.logAndThrowError(fmt.Errorf("cluster prefixed control message gauge during hard threshold check failed for node %s: %w", nodeID, err))
   917  	}
   918  	return gauge <= c.config.ClusterPrefixedMessage.HardThreshold
   919  }
   920  
   921  // logAndDistributeErr logs the provided error and attempts to disseminate an invalid control message validation notification for the error.
   922  // Args:
   923  //   - req: inspect rpc request that failed validation.
   924  //   - ctlMsgType: the control message type of the rpc message that caused the error.
   925  //   - err: the error that occurred.
   926  //   - count: the number of occurrences of the error.
   927  //   - isClusterPrefixed: indicates if the errors occurred on a cluster prefixed topic.
   928  func (c *ControlMsgValidationInspector) logAndDistributeAsyncInspectErrs(req *InspectRPCRequest, ctlMsgType p2pmsg.ControlMessageType, err error, count uint64, topicType p2p.CtrlMsgTopicType) {
   929  	lg := c.logger.With().
   930  		Err(err).
   931  		Str("control_message_type", ctlMsgType.String()).
   932  		Bool(logging.KeySuspicious, true).
   933  		Bool(logging.KeyNetworkingSecurity, true).
   934  		Str("topic_type", topicType.String()).
   935  		Uint64("error_count", count).
   936  		Str("peer_id", p2plogging.PeerId(req.Peer)).
   937  		Logger()
   938  
   939  	switch {
   940  	case IsErrActiveClusterIDsNotSet(err):
   941  		c.metrics.OnActiveClusterIDsNotSetErr()
   942  		lg.Warn().Msg("active cluster ids not set")
   943  	case IsErrUnstakedPeer(err):
   944  		c.metrics.OnUnstakedPeerInspectionFailed()
   945  		lg.Warn().Msg("control message received from unstaked peer")
   946  	default:
   947  		distErr := c.distributor.Distribute(p2p.NewInvalidControlMessageNotification(req.Peer, ctlMsgType, err, count, topicType))
   948  		if distErr != nil {
   949  			lg.Error().
   950  				Err(distErr).
   951  				Msg("failed to distribute invalid control message notification")
   952  			return
   953  		}
   954  		lg.Error().Msg("rpc control message async inspection failed")
   955  		c.metrics.OnInvalidControlMessageNotificationSent()
   956  	}
   957  }
   958  
   959  // logAndThrowError logs and throws irrecoverable errors on the context.
   960  // Args:
   961  //
   962  //	err: the error encountered.
   963  func (c *ControlMsgValidationInspector) logAndThrowError(err error) {
   964  	c.logger.Error().
   965  		Err(err).
   966  		Bool(logging.KeySuspicious, true).
   967  		Bool(logging.KeyNetworkingSecurity, true).
   968  		Msg("unexpected irrecoverable error encountered")
   969  	c.ctx.Throw(err)
   970  }