github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/module/metrics.go (about)

     1  package module
     2  
     3  import (
     4  	"context"
     5  	"time"
     6  
     7  	"github.com/libp2p/go-libp2p/core/peer"
     8  	rcmgr "github.com/libp2p/go-libp2p/p2p/host/resource-manager"
     9  	httpmetrics "github.com/slok/go-http-metrics/metrics"
    10  
    11  	"github.com/onflow/flow-go/model/chainsync"
    12  	"github.com/onflow/flow-go/model/cluster"
    13  	"github.com/onflow/flow-go/model/flow"
    14  	"github.com/onflow/flow-go/network/channels"
    15  	p2pmsg "github.com/onflow/flow-go/network/p2p/message"
    16  )
    17  
    18  type EntriesFunc func() uint
    19  
    20  // ResolverMetrics encapsulates the metrics collectors for dns resolver module of the networking layer.
    21  type ResolverMetrics interface {
    22  	// DNSLookupDuration tracks the time spent to resolve a DNS address.
    23  	DNSLookupDuration(duration time.Duration)
    24  
    25  	// OnDNSCacheMiss tracks the total number of dns requests resolved through looking up the network.
    26  	OnDNSCacheMiss()
    27  
    28  	// OnDNSCacheHit tracks the total number of dns requests resolved through the cache without
    29  	// looking up the network.
    30  	OnDNSCacheHit()
    31  
    32  	// OnDNSCacheInvalidated is called whenever dns cache is invalidated for an entry
    33  	OnDNSCacheInvalidated()
    34  
    35  	// OnDNSLookupRequestDropped tracks the number of dns lookup requests that are dropped due to a full queue
    36  	OnDNSLookupRequestDropped()
    37  }
    38  
    39  // NetworkSecurityMetrics metrics related to network protection.
    40  type NetworkSecurityMetrics interface {
    41  	// OnUnauthorizedMessage tracks the number of unauthorized messages seen on the network.
    42  	OnUnauthorizedMessage(role, msgType, topic, offense string)
    43  
    44  	// OnRateLimitedPeer tracks the number of rate limited unicast messages seen on the network.
    45  	OnRateLimitedPeer(pid peer.ID, role, msgType, topic, reason string)
    46  
    47  	// OnViolationReportSkipped tracks the number of slashing violations consumer violations that were not
    48  	// reported for misbehavior when the identity of the sender not known.
    49  	OnViolationReportSkipped()
    50  }
    51  
    52  // GossipSubRpcInspectorMetrics encapsulates the metrics collectors for GossipSub RPC Inspector module of the networking layer.
    53  // The RPC inspector is the entry point of the GossipSub protocol. It inspects the incoming RPC messages and decides
    54  // whether to accept, prune, or reject the RPC message.
    55  // The GossipSubRpcInspectorMetrics tracks the number of RPC messages received by the local node from other nodes over
    56  // the GossipSub protocol. It also tracks the number of control messages included in the RPC messages, i.e., IHAVE, IWANT,
    57  // GRAFT, PRUNE. It also tracks the number of actual messages included in the RPC messages.
    58  // The GossipSubRpcInspectorMetrics differs from LocalGossipSubRouterMetrics in that the former tracks the messages
    59  // received by the local node from other nodes over the GossipSub protocol but may not all be accepted by the local node,
    60  // e.g., due to RPC pruning or throttling; while the latter tracks the local node's view of the GossipSub protocol, i.e., entirely
    61  // containing the messages that are accepted by the local node (either as whole RPC or only for the control messages).
    62  // Having this distinction is useful for debugging and troubleshooting the GossipSub protocol, for example, the number of
    63  // messages received by the local node from other nodes over the GossipSub protocol may be much higher than the number
    64  // of messages accepted by the local node, which may indicate that the local node is throttling the incoming messages.
    65  type GossipSubRpcInspectorMetrics interface {
    66  	// OnIWantMessageIDsReceived tracks the number of message ids received by the node from other nodes on an RPC.
    67  	// Note: this function is called on each IWANT message received by the node, not on each message id received.
    68  	OnIWantMessageIDsReceived(msgIdCount int)
    69  
    70  	// OnIHaveMessageIDsReceived tracks the number of message ids received by the node from other nodes on an iHave message.
    71  	// This function is called on each iHave message received by the node.
    72  	// Args:
    73  	// - channel: the channel on which the iHave message was received.
    74  	// - msgIdCount: the number of message ids received on the iHave message.
    75  	OnIHaveMessageIDsReceived(channel string, msgIdCount int)
    76  
    77  	// OnIncomingRpcReceived tracks the number of RPC messages received by the node.
    78  	// Args:
    79  	// 	iHaveCount: the number of iHAVE messages included in the RPC.
    80  	// 	iWantCount: the number of iWANT messages included in the RPC.
    81  	// 	graftCount: the number of GRAFT messages included in the RPC.
    82  	// 	pruneCount: the number of PRUNE messages included in the RPC.
    83  	// 	msgCount: the number of publish messages included in the RPC.
    84  	OnIncomingRpcReceived(iHaveCount, iWantCount, graftCount, pruneCount, msgCount int)
    85  }
    86  
    87  // GossipSubScoringRegistryMetrics encapsulates the metrics collectors for collecting metrics related to the Gossipsub scoring registry.
    88  // GossipSubScoringRegistryMetrics encapsulates various metrics collectors offering insights into penalties and
    89  // other factors used by the scoring registry to compute the application-specific score. It focuses on tracking internal
    90  // aspects of the application-specific score, distinguishing itself from GossipSubScoringMetrics.
    91  type GossipSubScoringRegistryMetrics interface {
    92  	// DuplicateMessagePenalties tracks the duplicate message penalty for a node.
    93  	DuplicateMessagePenalties(penalty float64)
    94  	// DuplicateMessagesCounts tracks the duplicate message count for a node.
    95  	DuplicateMessagesCounts(count float64)
    96  }
    97  
    98  // LocalGossipSubRouterMetrics encapsulates the metrics collectors for GossipSub router of the local node.
    99  // It gives a lens into the local GossipSub node's view of the GossipSub protocol.
   100  // LocalGossipSubRouterMetrics differs from GossipSubRpcInspectorMetrics in that the former tracks the local node's view
   101  // of the GossipSub protocol, while the latter tracks the messages received by the local node from other nodes over the
   102  // GossipSub protocol but may not all be accepted by the local node, e.g., due to RPC pruning or throttling.
   103  // Having this distinction is useful for debugging and troubleshooting the GossipSub protocol, for example, the number of
   104  // messages received by the local node from other nodes over the GossipSub protocol may be much higher than the number
   105  // of messages accepted by the local node, which may indicate that the local node is throttling the incoming messages.
   106  type LocalGossipSubRouterMetrics interface {
   107  	// OnLocalMeshSizeUpdated tracks the size of the local mesh for a topic.
   108  	OnLocalMeshSizeUpdated(topic string, size int)
   109  
   110  	// OnPeerAddedToProtocol is called when the local node receives a stream from a peer on a gossipsub-related protocol.
   111  	// Args:
   112  	// 	protocol: the protocol name that the peer is connected to.
   113  	OnPeerAddedToProtocol(protocol string)
   114  
   115  	// OnPeerRemovedFromProtocol is called when the local considers a remote peer blacklisted or unavailable.
   116  	OnPeerRemovedFromProtocol()
   117  
   118  	// OnLocalPeerJoinedTopic is called when the local node subscribes to a gossipsub topic.
   119  	OnLocalPeerJoinedTopic()
   120  
   121  	// OnLocalPeerLeftTopic is called when the local node unsubscribes from a gossipsub topic.
   122  	OnLocalPeerLeftTopic()
   123  
   124  	// OnPeerGraftTopic is called when the local node receives a GRAFT message from a remote peer on a topic.
   125  	// Note: the received GRAFT at this point is considered passed the RPC inspection, and is accepted by the local node.
   126  	OnPeerGraftTopic(topic string)
   127  
   128  	// OnPeerPruneTopic is called when the local node receives a PRUNE message from a remote peer on a topic.
   129  	// Note: the received PRUNE at this point is considered passed the RPC inspection, and is accepted by the local node.
   130  	OnPeerPruneTopic(topic string)
   131  
   132  	// OnMessageEnteredValidation is called when a received pubsub message enters the validation pipeline. It is the
   133  	// internal validation pipeline of GossipSub protocol. The message may be rejected or accepted by the validation
   134  	// pipeline.
   135  	OnMessageEnteredValidation(size int)
   136  
   137  	// OnMessageRejected is called when a received pubsub message is rejected by the validation pipeline.
   138  	// Args:
   139  	//
   140  	//	reason: the reason for rejection.
   141  	// 	size: the size of the message in bytes.
   142  	OnMessageRejected(size int, reason string)
   143  
   144  	// OnMessageDuplicate is called when a received pubsub message is a duplicate of a previously received message, and
   145  	// is dropped.
   146  	// Args:
   147  	// 	size: the size of the message in bytes.
   148  	OnMessageDuplicate(size int)
   149  
   150  	// OnPeerThrottled is called when a peer is throttled by the local node, i.e., the local node is not accepting any
   151  	// pubsub message from the peer but may still accept control messages.
   152  	OnPeerThrottled()
   153  
   154  	// OnRpcReceived is called when an RPC message is received by the local node. The received RPC is considered
   155  	// passed the RPC inspection, and is accepted by the local node.
   156  	// Args:
   157  	// 	msgCount: the number of messages included in the RPC.
   158  	// 	iHaveCount: the number of iHAVE messages included in the RPC.
   159  	// 	iWantCount: the number of iWANT messages included in the RPC.
   160  	// 	graftCount: the number of GRAFT messages included in the RPC.
   161  	// 	pruneCount: the number of PRUNE messages included in the RPC.
   162  	OnRpcReceived(msgCount int, iHaveCount int, iWantCount int, graftCount int, pruneCount int)
   163  
   164  	// OnRpcSent is called when an RPC message is sent by the local node.
   165  	// Note: the sent RPC is considered passed the RPC inspection, and is accepted by the local node.
   166  	// Args:
   167  	// 	msgCount: the number of messages included in the RPC.
   168  	// 	iHaveCount: the number of iHAVE messages included in the RPC.
   169  	// 	iWantCount: the number of iWANT messages included in the RPC.
   170  	// 	graftCount: the number of GRAFT messages included in the RPC.
   171  	// 	pruneCount: the number of PRUNE messages included in the RPC.
   172  	OnRpcSent(msgCount int, iHaveCount int, iWantCount int, graftCount int, pruneCount int)
   173  
   174  	// OnOutboundRpcDropped is called when an outbound RPC message is dropped by the local node, typically because the local node
   175  	// outbound message queue is full; or the RPC is big and the local node cannot fragment it.
   176  	OnOutboundRpcDropped()
   177  
   178  	// OnUndeliveredMessage is called when a message is not delivered at least one subscriber of the topic, for example when
   179  	// the subscriber is too slow to process the message.
   180  	OnUndeliveredMessage()
   181  
   182  	// OnMessageDeliveredToAllSubscribers is called when a message is delivered to all subscribers of the topic.
   183  	OnMessageDeliveredToAllSubscribers(size int)
   184  }
   185  
   186  // UnicastManagerMetrics unicast manager metrics.
   187  type UnicastManagerMetrics interface {
   188  	// OnStreamCreated tracks the overall time it takes to create a stream successfully and the number of retry attempts.
   189  	OnStreamCreated(duration time.Duration, attempts int)
   190  	// OnStreamCreationFailure tracks the amount of time taken and number of retry attempts used when the unicast manager fails to create a stream.
   191  	OnStreamCreationFailure(duration time.Duration, attempts int)
   192  	// OnPeerDialed tracks the time it takes to dial a peer during stream creation and the number of retry attempts before a peer
   193  	// is dialed successfully.
   194  	OnPeerDialed(duration time.Duration, attempts int)
   195  	// OnPeerDialFailure tracks the amount of time taken and number of retry attempts used when the unicast manager cannot dial a peer
   196  	// to establish the initial connection between the two.
   197  	OnPeerDialFailure(duration time.Duration, attempts int)
   198  	// OnStreamEstablished tracks the time it takes to create a stream successfully on the available open connection during stream
   199  	// creation and the number of retry attempts.
   200  	OnStreamEstablished(duration time.Duration, attempts int)
   201  	// OnEstablishStreamFailure tracks the amount of time taken and number of retry attempts used when the unicast manager cannot establish
   202  	// a stream on the open connection between two peers.
   203  	OnEstablishStreamFailure(duration time.Duration, attempts int)
   204  
   205  	// OnDialRetryBudgetUpdated tracks the history of the dial retry budget updates.
   206  	OnDialRetryBudgetUpdated(budget uint64)
   207  
   208  	// OnStreamCreationRetryBudgetUpdated tracks the history of the stream creation retry budget updates.
   209  	OnStreamCreationRetryBudgetUpdated(budget uint64)
   210  
   211  	// OnDialRetryBudgetResetToDefault tracks the number of times the dial retry budget is reset to default.
   212  	OnDialRetryBudgetResetToDefault()
   213  
   214  	// OnStreamCreationRetryBudgetResetToDefault tracks the number of times the stream creation retry budget is reset to default.
   215  	OnStreamCreationRetryBudgetResetToDefault()
   216  }
   217  
   218  type GossipSubMetrics interface {
   219  	GossipSubScoringMetrics
   220  	GossipSubRpcInspectorMetrics
   221  	LocalGossipSubRouterMetrics
   222  	GossipSubRpcValidationInspectorMetrics
   223  }
   224  
   225  type LibP2PMetrics interface {
   226  	GossipSubMetrics
   227  	ResolverMetrics
   228  	DHTMetrics
   229  	rcmgr.MetricsReporter
   230  	LibP2PConnectionMetrics
   231  	UnicastManagerMetrics
   232  	GossipSubScoringRegistryMetrics
   233  }
   234  
   235  // GossipSubScoringMetrics encapsulates the metrics collectors for the peer scoring module of GossipSub protocol.
   236  // It tracks the scores of the peers in the local mesh and the different factors that contribute to the score of a peer.
   237  // It also tracks the scores of the topics in the local mesh and the different factors that contribute to the score of a topic.
   238  type GossipSubScoringMetrics interface {
   239  	// OnOverallPeerScoreUpdated tracks the overall score of peers in the local mesh.
   240  	OnOverallPeerScoreUpdated(float64)
   241  	// OnAppSpecificScoreUpdated tracks the application specific score of peers in the local mesh.
   242  	OnAppSpecificScoreUpdated(float64)
   243  	// OnIPColocationFactorUpdated tracks the IP colocation factor of peers in the local mesh.
   244  	OnIPColocationFactorUpdated(float64)
   245  	// OnBehaviourPenaltyUpdated tracks the behaviour penalty of peers in the local mesh.
   246  	OnBehaviourPenaltyUpdated(float64)
   247  	// OnTimeInMeshUpdated tracks the time in mesh factor of peers in the local mesh for a given topic.
   248  	OnTimeInMeshUpdated(channels.Topic, time.Duration)
   249  	// OnFirstMessageDeliveredUpdated tracks the first message delivered factor of peers in the local mesh for a given topic.
   250  	OnFirstMessageDeliveredUpdated(channels.Topic, float64)
   251  	// OnMeshMessageDeliveredUpdated tracks the mesh message delivered factor of peers in the local mesh for a given topic.
   252  	OnMeshMessageDeliveredUpdated(channels.Topic, float64)
   253  	// OnInvalidMessageDeliveredUpdated tracks the invalid message delivered factor of peers in the local mesh for a given topic.
   254  	OnInvalidMessageDeliveredUpdated(channels.Topic, float64)
   255  	// SetWarningStateCount tracks the warning score state of peers in the local mesh. It updates the total number of
   256  	// peers in the local mesh that are in the warning state based on their score.
   257  	SetWarningStateCount(uint)
   258  }
   259  
   260  // GossipSubRpcValidationInspectorMetrics encapsulates the metrics collectors for the gossipsub rpc validation control message inspectors.
   261  type GossipSubRpcValidationInspectorMetrics interface {
   262  	GossipSubRpcInspectorMetrics
   263  
   264  	// AsyncProcessingStarted increments the metric tracking the number of inspect message request being processed by workers in the rpc validator worker pool.
   265  	AsyncProcessingStarted()
   266  	// AsyncProcessingFinished tracks the time spent by a rpc validation inspector worker to process an inspect message request asynchronously and decrements the metric tracking
   267  	// the number of inspect message requests  being processed asynchronously by the rpc validation inspector workers.
   268  	AsyncProcessingFinished(duration time.Duration)
   269  
   270  	// OnIHaveControlMessageIdsTruncated tracks the number of times message ids on an iHave message were truncated.
   271  	// Note that this function is called only when the message ids are truncated from an iHave message, not when the iHave message itself is truncated.
   272  	// This is different from the OnControlMessagesTruncated function which is called when a slice of control messages truncated from an RPC with all their message ids.
   273  	// Args:
   274  	//
   275  	//	diff: the number of actual messages truncated.
   276  	OnIHaveControlMessageIdsTruncated(diff int)
   277  
   278  	// OnIWantControlMessageIdsTruncated tracks the number of times message ids on an iWant message were truncated.
   279  	// Note that this function is called only when the message ids are truncated from an iWant message, not when the iWant message itself is truncated.
   280  	// This is different from the OnControlMessagesTruncated function which is called when a slice of control messages truncated from an RPC with all their message ids.
   281  	// Args:
   282  	// 	diff: the number of actual messages truncated.
   283  	OnIWantControlMessageIdsTruncated(diff int)
   284  
   285  	// OnControlMessagesTruncated tracks the number of times a slice of control messages is truncated from an RPC with all their included message ids.
   286  	// Args:
   287  	//
   288  	//	messageType: the type of the control message that was truncated
   289  	//	diff: the number of control messages truncated.
   290  	OnControlMessagesTruncated(messageType p2pmsg.ControlMessageType, diff int)
   291  
   292  	// OnIWantMessagesInspected tracks the number of duplicate and cache miss message ids received by the node on iWant messages at the end of the async inspection iWants
   293  	// across one RPC, regardless of the result of the inspection.
   294  	//
   295  	//	duplicateCount: the total number of duplicate message ids received by the node on the iWant messages at the end of the async inspection of the RPC.
   296  	//	cacheMissCount: the total number of cache miss message ids received by the node on the iWant message at the end of the async inspection of the RPC.
   297  	OnIWantMessagesInspected(duplicateCount int, cacheMissCount int)
   298  
   299  	// OnIWantDuplicateMessageIdsExceedThreshold tracks the number of times that async inspection of iWant messages failed due to the total number of duplicate message ids
   300  	// received by the node on the iWant messages of a single RPC exceeding the threshold, which results in a misbehaviour report.
   301  	OnIWantDuplicateMessageIdsExceedThreshold()
   302  
   303  	// OnIWantCacheMissMessageIdsExceedThreshold tracks the number of times that async inspection of iWant messages failed due to the total
   304  	// number of cache miss message ids received by the node on the iWant messages of a single RPC exceeding the threshold, which results in a misbehaviour report.
   305  	OnIWantCacheMissMessageIdsExceedThreshold()
   306  
   307  	// OnIHaveMessagesInspected is called at the end of the async inspection of iHave messages of a single RPC, regardless of the result of the inspection.
   308  	// It tracks the number of duplicate topic ids and duplicate message ids received by the node on the iHave messages of that single RPC at the end of the async inspection iHaves.
   309  	// Args:
   310  	//
   311  	//	duplicateTopicIds: the total number of duplicate topic ids received by the node on the iHave messages at the end of the async inspection of the RPC.
   312  	//	duplicateMessageIds: the number of duplicate message ids received by the node on the iHave messages at the end of the async inspection of the RPC.
   313  	//	invalidTopicIds: the number of invalid message ids received by the node on the iHave messages at the end of the async inspection of the RPC.
   314  	OnIHaveMessagesInspected(duplicateTopicIds int, duplicateMessageIds, invalidTopicIds int)
   315  
   316  	// OnIHaveDuplicateTopicIdsExceedThreshold tracks the number of times that the async inspection of iHave messages of a single RPC failed due to the total number of duplicate topic ids
   317  	// received by the node on the iHave messages of that RPC exceeding the threshold, which results in a misbehaviour report.
   318  	OnIHaveDuplicateTopicIdsExceedThreshold()
   319  
   320  	// OnIHaveInvalidTopicIdsExceedThreshold tracks the number of times that the async inspection of iHave messages of a single RPC failed due to the total number of invalid topic ids
   321  	// received by the node on the iHave messages of that RPC exceeding the threshold, which results in a misbehaviour report.
   322  	OnIHaveInvalidTopicIdsExceedThreshold()
   323  
   324  	// OnIHaveDuplicateMessageIdsExceedThreshold tracks the number of times that the async inspection of iHave messages of a single RPC failed due to the total number of duplicate message ids
   325  	// received by the node on an iHave message exceeding the threshold, which results in a misbehaviour report.
   326  	OnIHaveDuplicateMessageIdsExceedThreshold()
   327  
   328  	// OnInvalidTopicIdDetectedForControlMessage tracks the number of times that the async inspection of a control message type on a single RPC failed due to an invalid topic id.
   329  	// Args:
   330  	// - messageType: the type of the control message that was truncated.
   331  	OnInvalidTopicIdDetectedForControlMessage(messageType p2pmsg.ControlMessageType)
   332  
   333  	// OnActiveClusterIDsNotSetErr tracks the number of times that the async inspection of a control message type on a single RPC failed due to active cluster ids not set inspection failure.
   334  	// This is not causing a misbehaviour report.
   335  	OnActiveClusterIDsNotSetErr()
   336  
   337  	// OnUnstakedPeerInspectionFailed tracks the number of times that the async inspection of a control message type on a single RPC failed due to unstaked peer inspection failure.
   338  	// This is not causing a misbehaviour report.
   339  	OnUnstakedPeerInspectionFailed()
   340  
   341  	// OnInvalidControlMessageNotificationSent tracks the number of times that the async inspection of a control message failed and resulted in dissemination of an invalid control message was sent.
   342  	OnInvalidControlMessageNotificationSent()
   343  
   344  	// OnRpcRejectedFromUnknownSender tracks the number of rpc's rejected from unstaked nodes.
   345  	OnRpcRejectedFromUnknownSender()
   346  
   347  	// OnPublishMessagesInspectionErrorExceedsThreshold tracks the number of times that async inspection of publish messages failed due to the number of errors.
   348  	OnPublishMessagesInspectionErrorExceedsThreshold()
   349  
   350  	// OnPruneDuplicateTopicIdsExceedThreshold tracks the number of times that the async inspection of prune messages for an RPC failed due to the number of duplicate topic ids
   351  	// received by the node on prune messages of the same RPC excesses threshold, which results in a misbehaviour report.
   352  	OnPruneDuplicateTopicIdsExceedThreshold()
   353  
   354  	// OnPruneInvalidTopicIdsExceedThreshold tracks the number of times that the async inspection of prune messages for an RPC failed due to the number of invalid topic ids
   355  	// received by the node on prune messages of the same RPC excesses threshold, which results in a misbehaviour report.
   356  	OnPruneInvalidTopicIdsExceedThreshold()
   357  
   358  	// OnPruneMessageInspected is called at the end of the async inspection of prune messages of the RPC, regardless of the result of the inspection.
   359  	// Args:
   360  	// 	duplicateTopicIds: the number of duplicate topic ids received by the node on the prune messages of the RPC at the end of the async inspection prunes.
   361  	// 	invalidTopicIds: the number of invalid topic ids received by the node on the prune messages at the end of the async inspection of a single RPC.
   362  	OnPruneMessageInspected(duplicateTopicIds, invalidTopicIds int)
   363  
   364  	// OnGraftDuplicateTopicIdsExceedThreshold tracks the number of times that the async inspection of the graft messages of a single RPC failed due to the number of duplicate topic ids
   365  	// received by the node on graft messages of the same RPC excesses threshold, which results in a misbehaviour report.
   366  	OnGraftDuplicateTopicIdsExceedThreshold()
   367  
   368  	// OnGraftInvalidTopicIdsExceedThreshold tracks the number of times that the async inspection of the graft messages of a single RPC failed due to the number of invalid topic ids
   369  	// received by the node on graft messages of the same RPC excesses threshold, which results in a misbehaviour report.
   370  	OnGraftInvalidTopicIdsExceedThreshold()
   371  
   372  	// OnGraftMessageInspected is called at the end of the async inspection of graft messages of a single RPC, regardless of the result of the inspection.
   373  	// Args:
   374  	// 	duplicateTopicIds: the number of duplicate topic ids received by the node on the graft messages at the end of the async inspection of a single RPC.
   375  	// 	invalidTopicIds: the number of invalid topic ids received by the node on the graft messages at the end of the async inspection of a single RPC.
   376  	OnGraftMessageInspected(duplicateTopicIds, invalidTopicIds int)
   377  
   378  	// OnPublishMessageInspected is called at the end of the async inspection of publish messages of a single RPC, regardless of the result of the inspection.
   379  	// It tracks the total number of errors detected during the async inspection of the rpc together with their individual breakdown.
   380  	// Args:
   381  	// - errCount: the number of errors that occurred during the async inspection of publish messages.
   382  	// - invalidTopicIdsCount: the number of times that an invalid topic id was detected during the async inspection of publish messages.
   383  	// - invalidSubscriptionsCount: the number of times that an invalid subscription was detected during the async inspection of publish messages.
   384  	// - invalidSendersCount: the number of times that an invalid sender was detected during the async inspection of publish messages.
   385  	OnPublishMessageInspected(totalErrCount int, invalidTopicIdsCount int, invalidSubscriptionsCount int, invalidSendersCount int)
   386  }
   387  
   388  // NetworkInboundQueueMetrics encapsulates the metrics collectors for the inbound queue of the networking layer.
   389  type NetworkInboundQueueMetrics interface {
   390  
   391  	// MessageAdded increments the metric tracking the number of messages in the queue with the given priority
   392  	MessageAdded(priority int)
   393  
   394  	// MessageRemoved decrements the metric tracking the number of messages in the queue with the given priority
   395  	MessageRemoved(priority int)
   396  
   397  	// QueueDuration tracks the time spent by a message with the given priority in the queue
   398  	QueueDuration(duration time.Duration, priority int)
   399  }
   400  
   401  // NetworkCoreMetrics encapsulates the metrics collectors for the core networking layer functionality.
   402  type NetworkCoreMetrics interface {
   403  	NetworkInboundQueueMetrics
   404  	AlspMetrics
   405  	NetworkSecurityMetrics
   406  
   407  	// OutboundMessageSent collects metrics related to a message sent by the node.
   408  	OutboundMessageSent(sizeBytes int, topic string, protocol string, messageType string)
   409  	// InboundMessageReceived collects metrics related to a message received by the node.
   410  	InboundMessageReceived(sizeBytes int, topic string, protocol string, messageType string)
   411  	// DuplicateInboundMessagesDropped increments the metric tracking the number of duplicate messages dropped by the node.
   412  	DuplicateInboundMessagesDropped(topic string, protocol string, messageType string)
   413  	// UnicastMessageSendingStarted increments the metric tracking the number of unicast messages sent by the node.
   414  	UnicastMessageSendingStarted(topic string)
   415  	// UnicastMessageSendingCompleted decrements the metric tracking the number of unicast messages sent by the node.
   416  	UnicastMessageSendingCompleted(topic string)
   417  	// MessageProcessingStarted increments the metric tracking the number of messages being processed by the node.
   418  	MessageProcessingStarted(topic string)
   419  	// MessageProcessingFinished tracks the time spent by the node to process a message and decrements the metric tracking
   420  	// the number of messages being processed by the node.
   421  	MessageProcessingFinished(topic string, duration time.Duration)
   422  }
   423  
   424  // LibP2PConnectionMetrics encapsulates the metrics collectors for the connection manager of the libp2p node.
   425  type LibP2PConnectionMetrics interface {
   426  	// OutboundConnections updates the metric tracking the number of outbound connections of this node
   427  	OutboundConnections(connectionCount uint)
   428  
   429  	// InboundConnections updates the metric tracking the number of inbound connections of this node
   430  	InboundConnections(connectionCount uint)
   431  }
   432  
   433  // AlspMetrics encapsulates the metrics collectors for the Application Layer Spam Prevention (ALSP) module, which
   434  // is part of the networking layer. ALSP is responsible to prevent spam attacks on the application layer messages that
   435  // appear to be valid for the networking layer but carry on a malicious intent on the application layer (i.e., Flow protocols).
   436  type AlspMetrics interface {
   437  	// OnMisbehaviorReported is called when a misbehavior is reported by the application layer to ALSP.
   438  	// An engine detecting a spamming-related misbehavior reports it to the ALSP module.
   439  	// Args:
   440  	// - channel: the channel on which the misbehavior was reported
   441  	// - misbehaviorType: the type of misbehavior reported
   442  	OnMisbehaviorReported(channel string, misbehaviorType string)
   443  }
   444  
   445  // NetworkMetrics is the blanket abstraction that encapsulates the metrics collectors for the networking layer.
   446  type NetworkMetrics interface {
   447  	LibP2PMetrics
   448  	NetworkCoreMetrics
   449  }
   450  
   451  // EngineMetrics is a generic metrics consumer for node-internal data processing
   452  // components (aka engines). Implementations must be non-blocking and concurrency safe.
   453  type EngineMetrics interface {
   454  	// MessageSent reports that the engine transmitted the message over the network.
   455  	// Unicasts, broadcasts, and multicasts are all reported once.
   456  	MessageSent(engine string, message string)
   457  	// MessageReceived reports that the engine received the message over the network.
   458  	MessageReceived(engine string, message string)
   459  	// MessageHandled reports that the engine has finished processing the message.
   460  	// Both invalid and valid messages should be reported.
   461  	// A message must be reported as either handled or dropped, not both.
   462  	MessageHandled(engine string, messages string)
   463  	// InboundMessageDropped reports that the engine has dropped inbound message without processing it.
   464  	// Inbound messages must be reported as either handled or dropped, not both.
   465  	InboundMessageDropped(engine string, messages string)
   466  	// OutboundMessageDropped reports that the engine has dropped outbound message without processing it.
   467  	// Outbound messages must be reported as either sent or dropped, not both.
   468  	OutboundMessageDropped(engine string, messages string)
   469  }
   470  
   471  type ComplianceMetrics interface {
   472  	FinalizedHeight(height uint64)
   473  	EpochTransitionHeight(height uint64)
   474  	SealedHeight(height uint64)
   475  	BlockFinalized(*flow.Block)
   476  	BlockSealed(*flow.Block)
   477  	CurrentEpochCounter(counter uint64)
   478  	CurrentEpochPhase(phase flow.EpochPhase)
   479  	CurrentEpochFinalView(view uint64)
   480  	CurrentDKGPhase1FinalView(view uint64)
   481  	CurrentDKGPhase2FinalView(view uint64)
   482  	CurrentDKGPhase3FinalView(view uint64)
   483  	EpochEmergencyFallbackTriggered()
   484  }
   485  
   486  type CleanerMetrics interface {
   487  	RanGC(took time.Duration)
   488  }
   489  
   490  type CacheMetrics interface {
   491  	// CacheEntries report the total number of cached items
   492  	CacheEntries(resource string, entries uint)
   493  	// CacheHit report the number of times the queried item is found in the cache
   494  	CacheHit(resource string)
   495  	// CacheNotFound records the number of times the queried item was not found in either cache or database.
   496  	CacheNotFound(resource string)
   497  	// CacheMiss report the number of times the queried item is not found in the cache, but found in the database.
   498  	CacheMiss(resource string)
   499  }
   500  
   501  type MempoolMetrics interface {
   502  	MempoolEntries(resource string, entries uint)
   503  	Register(resource string, entriesFunc EntriesFunc) error
   504  }
   505  
   506  type HotstuffMetrics interface {
   507  	// HotStuffBusyDuration reports Metrics C6 HotStuff Busy Duration
   508  	HotStuffBusyDuration(duration time.Duration, event string)
   509  
   510  	// HotStuffIdleDuration reports Metrics C6 HotStuff Idle Duration
   511  	HotStuffIdleDuration(duration time.Duration)
   512  
   513  	// HotStuffWaitDuration reports Metrics C6 HotStuff Idle Duration - the time between receiving and
   514  	// enqueueing a message to beginning to process that message.
   515  	HotStuffWaitDuration(duration time.Duration, event string)
   516  
   517  	// SetCurView reports Metrics C8: Current View maintained by Pacemaker.
   518  	SetCurView(view uint64)
   519  
   520  	// SetQCView reports Metrics C9: View of the newest QC known to Pacemaker.
   521  	SetQCView(view uint64)
   522  
   523  	// SetTCView reports last TC known to Pacemaker.
   524  	SetTCView(view uint64)
   525  
   526  	// CountSkipped counts the number of skips we did.
   527  	CountSkipped()
   528  
   529  	// CountTimeout tracks the number of views that this replica left due to observing a TC.
   530  	CountTimeout()
   531  
   532  	// SetTimeout sets the current timeout duration
   533  	SetTimeout(duration time.Duration)
   534  
   535  	// BlockProcessingDuration measures the time which the compliance engine
   536  	// spends to process one block proposal.
   537  	BlockProcessingDuration(duration time.Duration)
   538  
   539  	// VoteProcessingDuration measures the time which the hotstuff.VoteAggregator
   540  	// spends to process one vote.
   541  	VoteProcessingDuration(duration time.Duration)
   542  
   543  	// TimeoutObjectProcessingDuration measures the time which the hotstuff.TimeoutAggregator
   544  	// spends to process one timeout object.
   545  	TimeoutObjectProcessingDuration(duration time.Duration)
   546  
   547  	// CommitteeProcessingDuration measures the time which the HotStuff's core logic
   548  	// spends in the hotstuff.Replicas component, i.e. the time determining consensus
   549  	// committee relations.
   550  	CommitteeProcessingDuration(duration time.Duration)
   551  
   552  	// SignerProcessingDuration measures the time which the HotStuff's core logic
   553  	// spends in the hotstuff.Signer component, i.e. the with crypto-related operations.
   554  	SignerProcessingDuration(duration time.Duration)
   555  
   556  	// ValidatorProcessingDuration measures the time which the HotStuff's core logic
   557  	// spends in the hotstuff.Validator component, i.e. the with verifying
   558  	// consensus messages.
   559  	ValidatorProcessingDuration(duration time.Duration)
   560  
   561  	// PayloadProductionDuration measures the time which the HotStuff's core logic
   562  	// spends in the module.Builder component, i.e. the with generating block payloads.
   563  	PayloadProductionDuration(duration time.Duration)
   564  
   565  	// TimeoutCollectorsRange collects information from the node's `TimeoutAggregator` component.
   566  	// Specifically, it measurers the number of views for which we are currently collecting timeouts
   567  	// (i.e. the number of `TimeoutCollector` instances we are maintaining) and their lowest/highest view.
   568  	TimeoutCollectorsRange(lowestRetainedView uint64, newestViewCreatedCollector uint64, activeCollectors int)
   569  }
   570  
   571  type CruiseCtlMetrics interface {
   572  
   573  	// PIDError measures the current error values for the proportional, integration,
   574  	// and derivative terms of the PID controller.
   575  	PIDError(p, i, d float64)
   576  
   577  	// TargetProposalDuration measures the current value of the Block Time Controller output:
   578  	// the target duration from parent to child proposal.
   579  	TargetProposalDuration(duration time.Duration)
   580  
   581  	// ControllerOutput measures the output of the cruise control PID controller.
   582  	// Concretely, this is the quantity to subtract from the baseline view duration.
   583  	ControllerOutput(duration time.Duration)
   584  
   585  	// ProposalPublicationDelay measures the effective delay the controller imposes on publishing
   586  	// the node's own proposals, with all limits of authority applied.
   587  	// Note: Technically, our metrics capture the publication delay relative to when the publication delay was
   588  	// last requested. Currently, only the EventHandler requests a publication delay, exactly once per proposal.
   589  	ProposalPublicationDelay(duration time.Duration)
   590  }
   591  
   592  type CollectionMetrics interface {
   593  	// TransactionIngested is called when a new transaction is ingested by the
   594  	// node. It increments the total count of ingested transactions and starts
   595  	// a tx->col span for the transaction.
   596  	TransactionIngested(txID flow.Identifier)
   597  
   598  	// ClusterBlockProposed is called when a new collection is proposed by us or
   599  	// any other node in the cluster.
   600  	ClusterBlockProposed(block *cluster.Block)
   601  
   602  	// ClusterBlockFinalized is called when a collection is finalized.
   603  	ClusterBlockFinalized(block *cluster.Block)
   604  }
   605  
   606  type ConsensusMetrics interface {
   607  	// StartCollectionToFinalized reports Metrics C1: Collection Received by CCL→ Collection Included in Finalized Block
   608  	StartCollectionToFinalized(collectionID flow.Identifier)
   609  
   610  	// FinishCollectionToFinalized reports Metrics C1: Collection Received by CCL→ Collection Included in Finalized Block
   611  	FinishCollectionToFinalized(collectionID flow.Identifier)
   612  
   613  	// StartBlockToSeal reports Metrics C4: Block Received by CCL → Block Seal in finalized block
   614  	StartBlockToSeal(blockID flow.Identifier)
   615  
   616  	// FinishBlockToSeal reports Metrics C4: Block Received by CCL → Block Seal in finalized block
   617  	FinishBlockToSeal(blockID flow.Identifier)
   618  
   619  	// EmergencySeal increments the number of seals that were created in emergency mode
   620  	EmergencySeal()
   621  
   622  	// OnReceiptProcessingDuration records the number of seconds spent processing a receipt
   623  	OnReceiptProcessingDuration(duration time.Duration)
   624  
   625  	// OnApprovalProcessingDuration records the number of seconds spent processing an approval
   626  	OnApprovalProcessingDuration(duration time.Duration)
   627  
   628  	// CheckSealingDuration records absolute time for the full sealing check by the consensus match engine
   629  	CheckSealingDuration(duration time.Duration)
   630  }
   631  
   632  type VerificationMetrics interface {
   633  	// OnBlockConsumerJobDone is invoked by block consumer whenever it is notified a job is done by a worker. It
   634  	// sets the last processed block job index.
   635  	OnBlockConsumerJobDone(uint64)
   636  	// OnChunkConsumerJobDone is invoked by chunk consumer whenever it is notified a job is done by a worker. It
   637  	// sets the last processed chunk job index.
   638  	OnChunkConsumerJobDone(uint64)
   639  	// OnExecutionResultReceivedAtAssignerEngine is called whenever a new execution result arrives
   640  	// at Assigner engine. It increments total number of received execution results.
   641  	OnExecutionResultReceivedAtAssignerEngine()
   642  
   643  	// OnVerifiableChunkReceivedAtVerifierEngine increments a counter that keeps track of number of verifiable chunks received at
   644  	// verifier engine from fetcher engine.
   645  	OnVerifiableChunkReceivedAtVerifierEngine()
   646  
   647  	// OnFinalizedBlockArrivedAtAssigner sets a gauge that keeps track of number of the latest block height arrives
   648  	// at assigner engine. Note that it assumes blocks are coming to assigner engine in strictly increasing order of their height.
   649  	OnFinalizedBlockArrivedAtAssigner(height uint64)
   650  
   651  	// OnChunksAssignmentDoneAtAssigner increments a counter that keeps track of the total number of assigned chunks to
   652  	// the verification node.
   653  	OnChunksAssignmentDoneAtAssigner(chunks int)
   654  
   655  	// OnAssignedChunkProcessedAtAssigner increments a counter that keeps track of the total number of assigned chunks pushed by
   656  	// assigner engine to the fetcher engine.
   657  	OnAssignedChunkProcessedAtAssigner()
   658  
   659  	// OnAssignedChunkReceivedAtFetcher increments a counter that keeps track of number of assigned chunks arrive at fetcher engine.
   660  	OnAssignedChunkReceivedAtFetcher()
   661  
   662  	// OnChunkDataPackRequestSentByFetcher increments a counter that keeps track of number of chunk data pack requests that fetcher engine
   663  	// sends to requester engine.
   664  	OnChunkDataPackRequestSentByFetcher()
   665  
   666  	// OnChunkDataPackRequestReceivedByRequester increments a counter that keeps track of number of chunk data pack requests
   667  	// arrive at the requester engine from the fetcher engine.
   668  	OnChunkDataPackRequestReceivedByRequester()
   669  
   670  	// OnChunkDataPackRequestDispatchedInNetwork increments a counter that keeps track of number of chunk data pack requests that the
   671  	// requester engine dispatches in the network (to the execution nodes).
   672  	OnChunkDataPackRequestDispatchedInNetworkByRequester()
   673  
   674  	// OnChunkDataPackResponseReceivedFromNetwork increments a counter that keeps track of number of chunk data pack responses that the
   675  	// requester engine receives from execution nodes (through network).
   676  	OnChunkDataPackResponseReceivedFromNetworkByRequester()
   677  
   678  	// SetMaxChunkDataPackAttemptsForNextUnsealedHeightAtRequester is invoked when a cycle of requesting chunk data packs is done by requester engine.
   679  	// It updates the maximum number of attempts made by requester engine for requesting the chunk data packs of the next unsealed height.
   680  	// The maximum is taken over the history of all chunk data packs requested during that cycle that belong to the next unsealed height.
   681  	SetMaxChunkDataPackAttemptsForNextUnsealedHeightAtRequester(attempts uint64)
   682  
   683  	// OnChunkDataPackSentToFetcher increments a counter that keeps track of number of chunk data packs sent to the fetcher engine from
   684  	// requester engine.
   685  	OnChunkDataPackSentToFetcher()
   686  
   687  	// OnChunkDataPackArrivedAtFetcher increments a counter that keeps track of number of chunk data packs arrived at fetcher engine from
   688  	// requester engine.
   689  	OnChunkDataPackArrivedAtFetcher()
   690  
   691  	// OnVerifiableChunkSentToVerifier increments a counter that keeps track of number of verifiable chunks fetcher engine sent to verifier engine.
   692  	OnVerifiableChunkSentToVerifier()
   693  
   694  	// OnResultApprovalDispatchedInNetwork increments a counter that keeps track of number of result approvals dispatched in the network
   695  	// by verifier engine.
   696  	OnResultApprovalDispatchedInNetworkByVerifier()
   697  }
   698  
   699  // LedgerMetrics provides an interface to record Ledger Storage metrics.
   700  // Ledger storage is non-linear (fork-aware) so certain metrics are averaged
   701  // and computed before emitting for better visibility
   702  type LedgerMetrics interface {
   703  	// ForestApproxMemorySize records approximate memory usage of forest (all in-memory trees)
   704  	ForestApproxMemorySize(bytes uint64)
   705  
   706  	// ForestNumberOfTrees current number of trees in a forest (in memory)
   707  	ForestNumberOfTrees(number uint64)
   708  
   709  	// LatestTrieRegCount records the number of unique register allocated (the latest created trie)
   710  	LatestTrieRegCount(number uint64)
   711  
   712  	// LatestTrieRegCountDiff records the difference between the number of unique register allocated of the latest created trie and parent trie
   713  	LatestTrieRegCountDiff(number int64)
   714  
   715  	// LatestTrieRegSize records the size of unique register allocated (the latest created trie)
   716  	LatestTrieRegSize(size uint64)
   717  
   718  	// LatestTrieRegSizeDiff records the difference between the size of unique register allocated of the latest created trie and parent trie
   719  	LatestTrieRegSizeDiff(size int64)
   720  
   721  	// LatestTrieMaxDepthTouched records the maximum depth touched of the lastest created trie
   722  	LatestTrieMaxDepthTouched(maxDepth uint16)
   723  
   724  	// UpdateCount increase a counter of performed updates
   725  	UpdateCount()
   726  
   727  	// ProofSize records a proof size
   728  	ProofSize(bytes uint32)
   729  
   730  	// UpdateValuesNumber accumulates number of updated values
   731  	UpdateValuesNumber(number uint64)
   732  
   733  	// UpdateValuesSize total size (in bytes) of updates values
   734  	UpdateValuesSize(byte uint64)
   735  
   736  	// UpdateDuration records absolute time for the update of a trie
   737  	UpdateDuration(duration time.Duration)
   738  
   739  	// UpdateDurationPerItem records update time for single value (total duration / number of updated values)
   740  	UpdateDurationPerItem(duration time.Duration)
   741  
   742  	// ReadValuesNumber accumulates number of read values
   743  	ReadValuesNumber(number uint64)
   744  
   745  	// ReadValuesSize total size (in bytes) of read values
   746  	ReadValuesSize(byte uint64)
   747  
   748  	// ReadDuration records absolute time for the read from a trie
   749  	ReadDuration(duration time.Duration)
   750  
   751  	// ReadDurationPerItem records read time for single value (total duration / number of read values)
   752  	ReadDurationPerItem(duration time.Duration)
   753  }
   754  
   755  type WALMetrics interface {
   756  	// ExecutionCheckpointSize reports the size of a checkpoint in bytes
   757  	ExecutionCheckpointSize(bytes uint64)
   758  }
   759  
   760  type RateLimitedBlockstoreMetrics interface {
   761  	BytesRead(int)
   762  }
   763  
   764  type BitswapMetrics interface {
   765  	Peers(prefix string, n int)
   766  	Wantlist(prefix string, n int)
   767  	BlobsReceived(prefix string, n uint64)
   768  	DataReceived(prefix string, n uint64)
   769  	BlobsSent(prefix string, n uint64)
   770  	DataSent(prefix string, n uint64)
   771  	DupBlobsReceived(prefix string, n uint64)
   772  	DupDataReceived(prefix string, n uint64)
   773  	MessagesReceived(prefix string, n uint64)
   774  }
   775  
   776  type ExecutionDataRequesterMetrics interface {
   777  	// ExecutionDataFetchStarted records an in-progress download
   778  	ExecutionDataFetchStarted()
   779  
   780  	// ExecutionDataFetchFinished records a completed download
   781  	ExecutionDataFetchFinished(duration time.Duration, success bool, height uint64)
   782  
   783  	// NotificationSent reports that ExecutionData received notifications were sent for a block height
   784  	NotificationSent(height uint64)
   785  
   786  	// FetchRetried reports that a download retry was processed
   787  	FetchRetried()
   788  }
   789  
   790  type ExecutionStateIndexerMetrics interface {
   791  	// BlockIndexed records metrics from indexing execution data from a single block.
   792  	BlockIndexed(height uint64, duration time.Duration, events, registers, transactionResults int)
   793  
   794  	// BlockReindexed records that a previously indexed block was indexed again.
   795  	BlockReindexed()
   796  
   797  	// InitializeLatestHeight records the latest height that has been indexed.
   798  	// This should only be used during startup. After startup, use BlockIndexed to record newly
   799  	// indexed heights.
   800  	InitializeLatestHeight(height uint64)
   801  }
   802  
   803  type RuntimeMetrics interface {
   804  	// RuntimeTransactionParsed reports the time spent parsing a single transaction
   805  	RuntimeTransactionParsed(dur time.Duration)
   806  
   807  	// RuntimeTransactionChecked reports the time spent checking a single transaction
   808  	RuntimeTransactionChecked(dur time.Duration)
   809  
   810  	// RuntimeTransactionInterpreted reports the time spent interpreting a single transaction
   811  	RuntimeTransactionInterpreted(dur time.Duration)
   812  
   813  	// RuntimeSetNumberOfAccounts Sets the total number of accounts on the network
   814  	RuntimeSetNumberOfAccounts(count uint64)
   815  
   816  	// RuntimeTransactionProgramsCacheMiss reports a programs cache miss
   817  	// during transaction execution
   818  	RuntimeTransactionProgramsCacheMiss()
   819  
   820  	// RuntimeTransactionProgramsCacheHit reports a programs cache hit
   821  	// during transaction execution
   822  	RuntimeTransactionProgramsCacheHit()
   823  }
   824  
   825  type ProviderMetrics interface {
   826  	// ChunkDataPackRequestProcessed is executed every time a chunk data pack request is picked up for processing at execution node.
   827  	// It increases the request processed counter by one.
   828  	ChunkDataPackRequestProcessed()
   829  }
   830  
   831  type ExecutionDataProviderMetrics interface {
   832  	RootIDComputed(duration time.Duration, numberOfChunks int)
   833  	AddBlobsSucceeded(duration time.Duration, totalSize uint64)
   834  	AddBlobsFailed()
   835  }
   836  
   837  type ExecutionDataRequesterV2Metrics interface {
   838  	FulfilledHeight(blockHeight uint64)
   839  	ReceiptSkipped()
   840  	RequestSucceeded(blockHeight uint64, duration time.Duration, totalSize uint64, numberOfAttempts int)
   841  	RequestFailed(duration time.Duration, retryable bool)
   842  	RequestCanceled()
   843  	ResponseDropped()
   844  }
   845  
   846  type ExecutionDataPrunerMetrics interface {
   847  	Pruned(height uint64, duration time.Duration)
   848  }
   849  
   850  type RestMetrics interface {
   851  	// Example recorder taken from:
   852  	// https://github.com/slok/go-http-metrics/blob/master/metrics/prometheus/prometheus.go
   853  	httpmetrics.Recorder
   854  	AddTotalRequests(ctx context.Context, method string, routeName string)
   855  }
   856  
   857  type GRPCConnectionPoolMetrics interface {
   858  	// TotalConnectionsInPool updates the number connections to collection/execution nodes stored in the pool, and the size of the pool
   859  	TotalConnectionsInPool(connectionCount uint, connectionPoolSize uint)
   860  
   861  	// ConnectionFromPoolReused tracks the number of times a connection to a collection/execution node is reused from the connection pool
   862  	ConnectionFromPoolReused()
   863  
   864  	// ConnectionAddedToPool tracks the number of times a collection/execution node is added to the connection pool
   865  	ConnectionAddedToPool()
   866  
   867  	// NewConnectionEstablished tracks the number of times a new grpc connection is established
   868  	NewConnectionEstablished()
   869  
   870  	// ConnectionFromPoolInvalidated tracks the number of times a cached grpc connection is invalidated and closed
   871  	ConnectionFromPoolInvalidated()
   872  
   873  	// ConnectionFromPoolUpdated tracks the number of times a cached connection is updated
   874  	ConnectionFromPoolUpdated()
   875  
   876  	// ConnectionFromPoolEvicted tracks the number of times a cached connection is evicted from the cache
   877  	ConnectionFromPoolEvicted()
   878  }
   879  
   880  type AccessMetrics interface {
   881  	RestMetrics
   882  	GRPCConnectionPoolMetrics
   883  	TransactionMetrics
   884  	BackendScriptsMetrics
   885  
   886  	// UpdateExecutionReceiptMaxHeight is called whenever we store an execution receipt from a block from a newer height
   887  	UpdateExecutionReceiptMaxHeight(height uint64)
   888  
   889  	// UpdateLastFullBlockHeight tracks the height of the last block for which all collections were received
   890  	UpdateLastFullBlockHeight(height uint64)
   891  }
   892  
   893  type ExecutionResultStats struct {
   894  	ComputationUsed                 uint64
   895  	MemoryUsed                      uint64
   896  	EventCounts                     int
   897  	EventSize                       int
   898  	NumberOfRegistersTouched        int
   899  	NumberOfBytesWrittenToRegisters int
   900  	NumberOfCollections             int
   901  	NumberOfTransactions            int
   902  }
   903  
   904  func (stats *ExecutionResultStats) Merge(other ExecutionResultStats) {
   905  	stats.ComputationUsed += other.ComputationUsed
   906  	stats.MemoryUsed += other.MemoryUsed
   907  	stats.EventCounts += other.EventCounts
   908  	stats.EventSize += other.EventSize
   909  	stats.NumberOfRegistersTouched += other.NumberOfRegistersTouched
   910  	stats.NumberOfBytesWrittenToRegisters += other.NumberOfBytesWrittenToRegisters
   911  	stats.NumberOfCollections += other.NumberOfCollections
   912  	stats.NumberOfTransactions += other.NumberOfTransactions
   913  }
   914  
   915  type ExecutionMetrics interface {
   916  	LedgerMetrics
   917  	RuntimeMetrics
   918  	ProviderMetrics
   919  	WALMetrics
   920  
   921  	// StartBlockReceivedToExecuted starts a span to trace the duration of a block
   922  	// from being received for execution to execution being finished
   923  	StartBlockReceivedToExecuted(blockID flow.Identifier)
   924  
   925  	// FinishBlockReceivedToExecuted finishes a span to trace the duration of a block
   926  	// from being received for execution to execution being finished
   927  	FinishBlockReceivedToExecuted(blockID flow.Identifier)
   928  
   929  	// ExecutionStorageStateCommitment reports the storage size of a state commitment in bytes
   930  	ExecutionStorageStateCommitment(bytes int64)
   931  
   932  	// ExecutionLastExecutedBlockHeight reports last executed block height
   933  	ExecutionLastExecutedBlockHeight(height uint64)
   934  
   935  	// ExecutionLastFinalizedExecutedBlockHeight reports last finalized and executed block height
   936  	ExecutionLastFinalizedExecutedBlockHeight(height uint64)
   937  
   938  	// ExecutionBlockExecuted reports the total time and computation spent on executing a block
   939  	ExecutionBlockExecuted(dur time.Duration, stats ExecutionResultStats)
   940  
   941  	// ExecutionBlockExecutionEffortVectorComponent reports the unweighted effort of given ComputationKind at block level
   942  	ExecutionBlockExecutionEffortVectorComponent(string, uint)
   943  
   944  	// ExecutionBlockCachedPrograms reports the number of cached programs at the end of a block
   945  	ExecutionBlockCachedPrograms(programs int)
   946  
   947  	// ExecutionCollectionExecuted reports the total time and computation spent on executing a collection
   948  	ExecutionCollectionExecuted(dur time.Duration, stats ExecutionResultStats)
   949  
   950  	// ExecutionTransactionExecuted reports stats on executing a single transaction
   951  	ExecutionTransactionExecuted(
   952  		dur time.Duration,
   953  		numTxnConflictRetries int,
   954  		compUsed uint64,
   955  		memoryUsed uint64,
   956  		eventCounts int,
   957  		eventSize int,
   958  		failed bool)
   959  
   960  	// ExecutionChunkDataPackGenerated reports stats on chunk data pack generation
   961  	ExecutionChunkDataPackGenerated(proofSize, numberOfTransactions int)
   962  
   963  	// ExecutionScriptExecuted reports the time and memory spent on executing an script
   964  	ExecutionScriptExecuted(dur time.Duration, compUsed, memoryUsed, memoryEstimate uint64)
   965  
   966  	// ExecutionCollectionRequestSent reports when a request for a collection is sent to a collection node
   967  	ExecutionCollectionRequestSent()
   968  
   969  	// Unused
   970  	ExecutionCollectionRequestRetried()
   971  
   972  	// ExecutionSync reports when the state syncing is triggered or stopped.
   973  	ExecutionSync(syncing bool)
   974  
   975  	// Upload metrics
   976  	ExecutionBlockDataUploadStarted()
   977  	ExecutionBlockDataUploadFinished(dur time.Duration)
   978  	ExecutionComputationResultUploaded()
   979  	ExecutionComputationResultUploadRetried()
   980  
   981  	UpdateCollectionMaxHeight(height uint64)
   982  }
   983  
   984  type BackendScriptsMetrics interface {
   985  	// ScriptExecuted records the round trip time while executing a script
   986  	ScriptExecuted(dur time.Duration, size int)
   987  
   988  	// ScriptExecutionErrorLocal records script execution failures from local execution
   989  	ScriptExecutionErrorLocal()
   990  
   991  	// ScriptExecutionErrorOnExecutionNode records script execution failures on Execution Nodes
   992  	ScriptExecutionErrorOnExecutionNode()
   993  
   994  	// ScriptExecutionResultMismatch records script execution result mismatches between local and
   995  	// execution nodes
   996  	ScriptExecutionResultMismatch()
   997  
   998  	// ScriptExecutionResultMatch records script execution result matches between local and
   999  	// execution nodes
  1000  	ScriptExecutionResultMatch()
  1001  
  1002  	// ScriptExecutionErrorMismatch records script execution error mismatches between local and
  1003  	// execution nodes
  1004  	ScriptExecutionErrorMismatch()
  1005  
  1006  	// ScriptExecutionErrorMatch records script execution error matches between local and
  1007  	// execution nodes
  1008  	ScriptExecutionErrorMatch()
  1009  
  1010  	// ScriptExecutionNotIndexed records script execution matches where data for the block is not
  1011  	// indexed locally yet
  1012  	ScriptExecutionNotIndexed()
  1013  }
  1014  
  1015  type TransactionMetrics interface {
  1016  	// Record the round trip time while getting a transaction result
  1017  	TransactionResultFetched(dur time.Duration, size int)
  1018  
  1019  	// TransactionReceived starts tracking of transaction execution/finalization/sealing
  1020  	TransactionReceived(txID flow.Identifier, when time.Time)
  1021  
  1022  	// TransactionFinalized reports the time spent between the transaction being received and finalized. Reporting only
  1023  	// works if the transaction was earlier added as received.
  1024  	TransactionFinalized(txID flow.Identifier, when time.Time)
  1025  
  1026  	// TransactionExecuted reports the time spent between the transaction being received and executed. Reporting only
  1027  	// works if the transaction was earlier added as received.
  1028  	TransactionExecuted(txID flow.Identifier, when time.Time)
  1029  
  1030  	// TransactionExpired tracks number of expired transactions
  1031  	TransactionExpired(txID flow.Identifier)
  1032  
  1033  	// TransactionSubmissionFailed should be called whenever we try to submit a transaction and it fails
  1034  	TransactionSubmissionFailed()
  1035  }
  1036  
  1037  type PingMetrics interface {
  1038  	// NodeReachable tracks the round trip time in milliseconds taken to ping a node
  1039  	// The nodeInfo provides additional information about the node such as the name of the node operator
  1040  	NodeReachable(node *flow.Identity, nodeInfo string, rtt time.Duration)
  1041  
  1042  	// NodeInfo tracks the software version, sealed height and hotstuff view of a node
  1043  	NodeInfo(node *flow.Identity, nodeInfo string, version string, sealedHeight uint64, hotstuffCurView uint64)
  1044  }
  1045  
  1046  type HeroCacheMetrics interface {
  1047  	// BucketAvailableSlots keeps track of number of available slots in buckets of cache.
  1048  	BucketAvailableSlots(uint64, uint64)
  1049  
  1050  	// OnKeyPutAttempt is called whenever a new (key, value) pair is attempted to be put in cache.
  1051  	// It does not reflect whether the put was successful or not.
  1052  	// A (key, value) pair put attempt may fail if the cache is full, or the key already exists.
  1053  	OnKeyPutAttempt(size uint32)
  1054  
  1055  	// OnKeyPutSuccess is called whenever a new (key, entity) pair is successfully added to the cache.
  1056  	OnKeyPutSuccess(size uint32)
  1057  
  1058  	// OnKeyPutDrop is called whenever a new (key, entity) pair is dropped from the cache due to full cache.
  1059  	OnKeyPutDrop()
  1060  
  1061  	// OnKeyPutDeduplicated is tracking the total number of unsuccessful writes caused by adding a duplicate key to the cache.
  1062  	// A duplicate key is dropped by the cache when it is written to the cache.
  1063  	// Note: in context of HeroCache, the key corresponds to the identifier of its entity. Hence, a duplicate key corresponds to
  1064  	// a duplicate entity.
  1065  	OnKeyPutDeduplicated()
  1066  
  1067  	// OnKeyRemoved is called whenever a (key, entity) pair is removed from the cache.
  1068  	OnKeyRemoved(size uint32)
  1069  
  1070  	// OnKeyGetSuccess tracks total number of successful read queries.
  1071  	// A read query is successful if the entity corresponding to its key is available in the cache.
  1072  	// Note: in context of HeroCache, the key corresponds to the identifier of its entity.
  1073  	OnKeyGetSuccess()
  1074  
  1075  	// OnKeyGetFailure tracks total number of unsuccessful read queries.
  1076  	// A read query is unsuccessful if the entity corresponding to its key is not available in the cache.
  1077  	// Note: in context of HeroCache, the key corresponds to the identifier of its entity.
  1078  	OnKeyGetFailure()
  1079  
  1080  	// OnEntityEjectionDueToFullCapacity is called whenever adding a new (key, entity) to the cache results in ejection of another (key', entity') pair.
  1081  	// This normally happens -- and is expected -- when the cache is full.
  1082  	// Note: in context of HeroCache, the key corresponds to the identifier of its entity.
  1083  	OnEntityEjectionDueToFullCapacity()
  1084  
  1085  	// OnEntityEjectionDueToEmergency is called whenever a bucket is found full and all of its keys are valid, i.e.,
  1086  	// each key belongs to an existing (key, entity) pair.
  1087  	// Hence, adding a new key to that bucket will replace the oldest valid key inside that bucket.
  1088  	// Note: in context of HeroCache, the key corresponds to the identifier of its entity.
  1089  	OnEntityEjectionDueToEmergency()
  1090  }
  1091  
  1092  type ChainSyncMetrics interface {
  1093  	// record pruned blocks. requested and received times might be zero values
  1094  	PrunedBlockById(status *chainsync.Status)
  1095  
  1096  	PrunedBlockByHeight(status *chainsync.Status)
  1097  
  1098  	// totalByHeight and totalById are the number of blocks pruned for blocks requested by height and by id
  1099  	// storedByHeight and storedById are the number of blocks still stored by height and id
  1100  	PrunedBlocks(totalByHeight, totalById, storedByHeight, storedById int)
  1101  
  1102  	RangeRequested(ran chainsync.Range)
  1103  
  1104  	BatchRequested(batch chainsync.Batch)
  1105  }
  1106  
  1107  type DHTMetrics interface {
  1108  	RoutingTablePeerAdded()
  1109  	RoutingTablePeerRemoved()
  1110  }
  1111  
  1112  type CollectionExecutedMetric interface {
  1113  	CollectionFinalized(light flow.LightCollection)
  1114  	CollectionExecuted(light flow.LightCollection)
  1115  	BlockFinalized(block *flow.Block)
  1116  	ExecutionReceiptReceived(r *flow.ExecutionReceipt)
  1117  	UpdateLastFullBlockHeight(height uint64)
  1118  }
  1119  
  1120  type MachineAccountMetrics interface {
  1121  	// AccountBalance reports the current balance of the machine account.
  1122  	AccountBalance(bal float64)
  1123  	// RecommendedMinBalance reports the recommended minimum balance. If the actual balance
  1124  	// falls below this level, it must be refilled.
  1125  	// NOTE: Operators should alert on `AccountBalance < RecommendedMinBalance`
  1126  	RecommendedMinBalance(bal float64)
  1127  	// IsMisconfigured reports whether a critical misconfiguration has been detected.
  1128  	// NOTE Operators should alert on non-zero values reported here.
  1129  	IsMisconfigured(misconfigured bool)
  1130  }