github.com/onflow/flow-go@v0.33.17/module/metrics.go (about)

     1  package module
     2  
     3  import (
     4  	"context"
     5  	"time"
     6  
     7  	"github.com/libp2p/go-libp2p/core/peer"
     8  	rcmgr "github.com/libp2p/go-libp2p/p2p/host/resource-manager"
     9  	httpmetrics "github.com/slok/go-http-metrics/metrics"
    10  
    11  	"github.com/onflow/flow-go/model/chainsync"
    12  	"github.com/onflow/flow-go/model/cluster"
    13  	"github.com/onflow/flow-go/model/flow"
    14  	"github.com/onflow/flow-go/network/channels"
    15  	p2pmsg "github.com/onflow/flow-go/network/p2p/message"
    16  )
    17  
    18  type EntriesFunc func() uint
    19  
    20  // ResolverMetrics encapsulates the metrics collectors for dns resolver module of the networking layer.
    21  type ResolverMetrics interface {
    22  	// DNSLookupDuration tracks the time spent to resolve a DNS address.
    23  	DNSLookupDuration(duration time.Duration)
    24  
    25  	// OnDNSCacheMiss tracks the total number of dns requests resolved through looking up the network.
    26  	OnDNSCacheMiss()
    27  
    28  	// OnDNSCacheHit tracks the total number of dns requests resolved through the cache without
    29  	// looking up the network.
    30  	OnDNSCacheHit()
    31  
    32  	// OnDNSCacheInvalidated is called whenever dns cache is invalidated for an entry
    33  	OnDNSCacheInvalidated()
    34  
    35  	// OnDNSLookupRequestDropped tracks the number of dns lookup requests that are dropped due to a full queue
    36  	OnDNSLookupRequestDropped()
    37  }
    38  
    39  // NetworkSecurityMetrics metrics related to network protection.
    40  type NetworkSecurityMetrics interface {
    41  	// OnUnauthorizedMessage tracks the number of unauthorized messages seen on the network.
    42  	OnUnauthorizedMessage(role, msgType, topic, offense string)
    43  
    44  	// OnRateLimitedPeer tracks the number of rate limited unicast messages seen on the network.
    45  	OnRateLimitedPeer(pid peer.ID, role, msgType, topic, reason string)
    46  
    47  	// OnViolationReportSkipped tracks the number of slashing violations consumer violations that were not
    48  	// reported for misbehavior when the identity of the sender not known.
    49  	OnViolationReportSkipped()
    50  }
    51  
    52  // GossipSubRpcInspectorMetrics encapsulates the metrics collectors for GossipSub RPC Inspector module of the networking layer.
    53  // The RPC inspector is the entry point of the GossipSub protocol. It inspects the incoming RPC messages and decides
    54  // whether to accept, prune, or reject the RPC message.
    55  // The GossipSubRpcInspectorMetrics tracks the number of RPC messages received by the local node from other nodes over
    56  // the GossipSub protocol. It also tracks the number of control messages included in the RPC messages, i.e., IHAVE, IWANT,
    57  // GRAFT, PRUNE. It also tracks the number of actual messages included in the RPC messages.
    58  // The GossipSubRpcInspectorMetrics differs from LocalGossipSubRouterMetrics in that the former tracks the messages
    59  // received by the local node from other nodes over the GossipSub protocol but may not all be accepted by the local node,
    60  // e.g., due to RPC pruning or throttling; while the latter tracks the local node's view of the GossipSub protocol, i.e., entirely
    61  // containing the messages that are accepted by the local node (either as whole RPC or only for the control messages).
    62  // Having this distinction is useful for debugging and troubleshooting the GossipSub protocol, for example, the number of
    63  // messages received by the local node from other nodes over the GossipSub protocol may be much higher than the number
    64  // of messages accepted by the local node, which may indicate that the local node is throttling the incoming messages.
    65  type GossipSubRpcInspectorMetrics interface {
    66  	// OnIWantMessageIDsReceived tracks the number of message ids received by the node from other nodes on an RPC.
    67  	// Note: this function is called on each IWANT message received by the node, not on each message id received.
    68  	OnIWantMessageIDsReceived(msgIdCount int)
    69  
    70  	// OnIHaveMessageIDsReceived tracks the number of message ids received by the node from other nodes on an iHave message.
    71  	// This function is called on each iHave message received by the node.
    72  	// Args:
    73  	// - channel: the channel on which the iHave message was received.
    74  	// - msgIdCount: the number of message ids received on the iHave message.
    75  	OnIHaveMessageIDsReceived(channel string, msgIdCount int)
    76  
    77  	// OnIncomingRpcReceived tracks the number of RPC messages received by the node.
    78  	// Args:
    79  	// 	iHaveCount: the number of iHAVE messages included in the RPC.
    80  	// 	iWantCount: the number of iWANT messages included in the RPC.
    81  	// 	graftCount: the number of GRAFT messages included in the RPC.
    82  	// 	pruneCount: the number of PRUNE messages included in the RPC.
    83  	// 	msgCount: the number of publish messages included in the RPC.
    84  	OnIncomingRpcReceived(iHaveCount, iWantCount, graftCount, pruneCount, msgCount int)
    85  }
    86  
    87  // LocalGossipSubRouterMetrics encapsulates the metrics collectors for GossipSub router of the local node.
    88  // It gives a lens into the local GossipSub node's view of the GossipSub protocol.
    89  // LocalGossipSubRouterMetrics differs from GossipSubRpcInspectorMetrics in that the former tracks the local node's view
    90  // of the GossipSub protocol, while the latter tracks the messages received by the local node from other nodes over the
    91  // GossipSub protocol but may not all be accepted by the local node, e.g., due to RPC pruning or throttling.
    92  // Having this distinction is useful for debugging and troubleshooting the GossipSub protocol, for example, the number of
    93  // messages received by the local node from other nodes over the GossipSub protocol may be much higher than the number
    94  // of messages accepted by the local node, which may indicate that the local node is throttling the incoming messages.
    95  type LocalGossipSubRouterMetrics interface {
    96  	// OnLocalMeshSizeUpdated tracks the size of the local mesh for a topic.
    97  	OnLocalMeshSizeUpdated(topic string, size int)
    98  
    99  	// OnPeerAddedToProtocol is called when the local node receives a stream from a peer on a gossipsub-related protocol.
   100  	// Args:
   101  	// 	protocol: the protocol name that the peer is connected to.
   102  	OnPeerAddedToProtocol(protocol string)
   103  
   104  	// OnPeerRemovedFromProtocol is called when the local considers a remote peer blacklisted or unavailable.
   105  	OnPeerRemovedFromProtocol()
   106  
   107  	// OnLocalPeerJoinedTopic is called when the local node subscribes to a gossipsub topic.
   108  	OnLocalPeerJoinedTopic()
   109  
   110  	// OnLocalPeerLeftTopic is called when the local node unsubscribes from a gossipsub topic.
   111  	OnLocalPeerLeftTopic()
   112  
   113  	// OnPeerGraftTopic is called when the local node receives a GRAFT message from a remote peer on a topic.
   114  	// Note: the received GRAFT at this point is considered passed the RPC inspection, and is accepted by the local node.
   115  	OnPeerGraftTopic(topic string)
   116  
   117  	// OnPeerPruneTopic is called when the local node receives a PRUNE message from a remote peer on a topic.
   118  	// Note: the received PRUNE at this point is considered passed the RPC inspection, and is accepted by the local node.
   119  	OnPeerPruneTopic(topic string)
   120  
   121  	// OnMessageEnteredValidation is called when a received pubsub message enters the validation pipeline. It is the
   122  	// internal validation pipeline of GossipSub protocol. The message may be rejected or accepted by the validation
   123  	// pipeline.
   124  	OnMessageEnteredValidation(size int)
   125  
   126  	// OnMessageRejected is called when a received pubsub message is rejected by the validation pipeline.
   127  	// Args:
   128  	//
   129  	//	reason: the reason for rejection.
   130  	// 	size: the size of the message in bytes.
   131  	OnMessageRejected(size int, reason string)
   132  
   133  	// OnMessageDuplicate is called when a received pubsub message is a duplicate of a previously received message, and
   134  	// is dropped.
   135  	// Args:
   136  	// 	size: the size of the message in bytes.
   137  	OnMessageDuplicate(size int)
   138  
   139  	// OnPeerThrottled is called when a peer is throttled by the local node, i.e., the local node is not accepting any
   140  	// pubsub message from the peer but may still accept control messages.
   141  	OnPeerThrottled()
   142  
   143  	// OnRpcReceived is called when an RPC message is received by the local node. The received RPC is considered
   144  	// passed the RPC inspection, and is accepted by the local node.
   145  	// Args:
   146  	// 	msgCount: the number of messages included in the RPC.
   147  	// 	iHaveCount: the number of iHAVE messages included in the RPC.
   148  	// 	iWantCount: the number of iWANT messages included in the RPC.
   149  	// 	graftCount: the number of GRAFT messages included in the RPC.
   150  	// 	pruneCount: the number of PRUNE messages included in the RPC.
   151  	OnRpcReceived(msgCount int, iHaveCount int, iWantCount int, graftCount int, pruneCount int)
   152  
   153  	// OnRpcSent is called when an RPC message is sent by the local node.
   154  	// Note: the sent RPC is considered passed the RPC inspection, and is accepted by the local node.
   155  	// Args:
   156  	// 	msgCount: the number of messages included in the RPC.
   157  	// 	iHaveCount: the number of iHAVE messages included in the RPC.
   158  	// 	iWantCount: the number of iWANT messages included in the RPC.
   159  	// 	graftCount: the number of GRAFT messages included in the RPC.
   160  	// 	pruneCount: the number of PRUNE messages included in the RPC.
   161  	OnRpcSent(msgCount int, iHaveCount int, iWantCount int, graftCount int, pruneCount int)
   162  
   163  	// OnOutboundRpcDropped is called when an outbound RPC message is dropped by the local node, typically because the local node
   164  	// outbound message queue is full; or the RPC is big and the local node cannot fragment it.
   165  	OnOutboundRpcDropped()
   166  
   167  	// OnUndeliveredMessage is called when a message is not delivered at least one subscriber of the topic, for example when
   168  	// the subscriber is too slow to process the message.
   169  	OnUndeliveredMessage()
   170  
   171  	// OnMessageDeliveredToAllSubscribers is called when a message is delivered to all subscribers of the topic.
   172  	OnMessageDeliveredToAllSubscribers(size int)
   173  }
   174  
   175  // UnicastManagerMetrics unicast manager metrics.
   176  type UnicastManagerMetrics interface {
   177  	// OnStreamCreated tracks the overall time it takes to create a stream successfully and the number of retry attempts.
   178  	OnStreamCreated(duration time.Duration, attempts int)
   179  	// OnStreamCreationFailure tracks the amount of time taken and number of retry attempts used when the unicast manager fails to create a stream.
   180  	OnStreamCreationFailure(duration time.Duration, attempts int)
   181  	// OnPeerDialed tracks the time it takes to dial a peer during stream creation and the number of retry attempts before a peer
   182  	// is dialed successfully.
   183  	OnPeerDialed(duration time.Duration, attempts int)
   184  	// OnPeerDialFailure tracks the amount of time taken and number of retry attempts used when the unicast manager cannot dial a peer
   185  	// to establish the initial connection between the two.
   186  	OnPeerDialFailure(duration time.Duration, attempts int)
   187  	// OnStreamEstablished tracks the time it takes to create a stream successfully on the available open connection during stream
   188  	// creation and the number of retry attempts.
   189  	OnStreamEstablished(duration time.Duration, attempts int)
   190  	// OnEstablishStreamFailure tracks the amount of time taken and number of retry attempts used when the unicast manager cannot establish
   191  	// a stream on the open connection between two peers.
   192  	OnEstablishStreamFailure(duration time.Duration, attempts int)
   193  
   194  	// OnDialRetryBudgetUpdated tracks the history of the dial retry budget updates.
   195  	OnDialRetryBudgetUpdated(budget uint64)
   196  
   197  	// OnStreamCreationRetryBudgetUpdated tracks the history of the stream creation retry budget updates.
   198  	OnStreamCreationRetryBudgetUpdated(budget uint64)
   199  
   200  	// OnDialRetryBudgetResetToDefault tracks the number of times the dial retry budget is reset to default.
   201  	OnDialRetryBudgetResetToDefault()
   202  
   203  	// OnStreamCreationRetryBudgetResetToDefault tracks the number of times the stream creation retry budget is reset to default.
   204  	OnStreamCreationRetryBudgetResetToDefault()
   205  }
   206  
   207  type GossipSubMetrics interface {
   208  	GossipSubScoringMetrics
   209  	GossipSubRpcInspectorMetrics
   210  	LocalGossipSubRouterMetrics
   211  	GossipSubRpcValidationInspectorMetrics
   212  }
   213  
   214  type LibP2PMetrics interface {
   215  	GossipSubMetrics
   216  	ResolverMetrics
   217  	DHTMetrics
   218  	rcmgr.MetricsReporter
   219  	LibP2PConnectionMetrics
   220  	UnicastManagerMetrics
   221  }
   222  
   223  // GossipSubScoringMetrics encapsulates the metrics collectors for the peer scoring module of GossipSub protocol.
   224  // It tracks the scores of the peers in the local mesh and the different factors that contribute to the score of a peer.
   225  // It also tracks the scores of the topics in the local mesh and the different factors that contribute to the score of a topic.
   226  type GossipSubScoringMetrics interface {
   227  	// OnOverallPeerScoreUpdated tracks the overall score of peers in the local mesh.
   228  	OnOverallPeerScoreUpdated(float64)
   229  	// OnAppSpecificScoreUpdated tracks the application specific score of peers in the local mesh.
   230  	OnAppSpecificScoreUpdated(float64)
   231  	// OnIPColocationFactorUpdated tracks the IP colocation factor of peers in the local mesh.
   232  	OnIPColocationFactorUpdated(float64)
   233  	// OnBehaviourPenaltyUpdated tracks the behaviour penalty of peers in the local mesh.
   234  	OnBehaviourPenaltyUpdated(float64)
   235  	// OnTimeInMeshUpdated tracks the time in mesh factor of peers in the local mesh for a given topic.
   236  	OnTimeInMeshUpdated(channels.Topic, time.Duration)
   237  	// OnFirstMessageDeliveredUpdated tracks the first message delivered factor of peers in the local mesh for a given topic.
   238  	OnFirstMessageDeliveredUpdated(channels.Topic, float64)
   239  	// OnMeshMessageDeliveredUpdated tracks the mesh message delivered factor of peers in the local mesh for a given topic.
   240  	OnMeshMessageDeliveredUpdated(channels.Topic, float64)
   241  	// OnInvalidMessageDeliveredUpdated tracks the invalid message delivered factor of peers in the local mesh for a given topic.
   242  	OnInvalidMessageDeliveredUpdated(channels.Topic, float64)
   243  	// SetWarningStateCount tracks the warning score state of peers in the local mesh. It updates the total number of
   244  	// peers in the local mesh that are in the warning state based on their score.
   245  	SetWarningStateCount(uint)
   246  }
   247  
   248  // GossipSubRpcValidationInspectorMetrics encapsulates the metrics collectors for the gossipsub rpc validation control message inspectors.
   249  type GossipSubRpcValidationInspectorMetrics interface {
   250  	GossipSubRpcInspectorMetrics
   251  
   252  	// AsyncProcessingStarted increments the metric tracking the number of inspect message request being processed by workers in the rpc validator worker pool.
   253  	AsyncProcessingStarted()
   254  	// AsyncProcessingFinished tracks the time spent by a rpc validation inspector worker to process an inspect message request asynchronously and decrements the metric tracking
   255  	// the number of inspect message requests  being processed asynchronously by the rpc validation inspector workers.
   256  	AsyncProcessingFinished(duration time.Duration)
   257  
   258  	// OnIHaveControlMessageIdsTruncated tracks the number of times message ids on an iHave message were truncated.
   259  	// Note that this function is called only when the message ids are truncated from an iHave message, not when the iHave message itself is truncated.
   260  	// This is different from the OnControlMessagesTruncated function which is called when a slice of control messages truncated from an RPC with all their message ids.
   261  	// Args:
   262  	//
   263  	//	diff: the number of actual messages truncated.
   264  	OnIHaveControlMessageIdsTruncated(diff int)
   265  
   266  	// OnIWantControlMessageIdsTruncated tracks the number of times message ids on an iWant message were truncated.
   267  	// Note that this function is called only when the message ids are truncated from an iWant message, not when the iWant message itself is truncated.
   268  	// This is different from the OnControlMessagesTruncated function which is called when a slice of control messages truncated from an RPC with all their message ids.
   269  	// Args:
   270  	// 	diff: the number of actual messages truncated.
   271  	OnIWantControlMessageIdsTruncated(diff int)
   272  
   273  	// OnControlMessagesTruncated tracks the number of times a slice of control messages is truncated from an RPC with all their included message ids.
   274  	// Args:
   275  	//
   276  	//	messageType: the type of the control message that was truncated
   277  	//	diff: the number of control messages truncated.
   278  	OnControlMessagesTruncated(messageType p2pmsg.ControlMessageType, diff int)
   279  
   280  	// OnIWantMessagesInspected tracks the number of duplicate and cache miss message ids received by the node on iWant messages at the end of the async inspection iWants
   281  	// across one RPC, regardless of the result of the inspection.
   282  	//
   283  	//	duplicateCount: the total number of duplicate message ids received by the node on the iWant messages at the end of the async inspection of the RPC.
   284  	//	cacheMissCount: the total number of cache miss message ids received by the node on the iWant message at the end of the async inspection of the RPC.
   285  	OnIWantMessagesInspected(duplicateCount int, cacheMissCount int)
   286  
   287  	// OnIWantDuplicateMessageIdsExceedThreshold tracks the number of times that async inspection of iWant messages failed due to the total number of duplicate message ids
   288  	// received by the node on the iWant messages of a single RPC exceeding the threshold, which results in a misbehaviour report.
   289  	OnIWantDuplicateMessageIdsExceedThreshold()
   290  
   291  	// OnIWantCacheMissMessageIdsExceedThreshold tracks the number of times that async inspection of iWant messages failed due to the total
   292  	// number of cache miss message ids received by the node on the iWant messages of a single RPC exceeding the threshold, which results in a misbehaviour report.
   293  	OnIWantCacheMissMessageIdsExceedThreshold()
   294  
   295  	// OnIHaveMessagesInspected is called at the end of the async inspection of iHave messages of a single RPC, regardless of the result of the inspection.
   296  	// It tracks the number of duplicate topic ids and duplicate message ids received by the node on the iHave messages of that single RPC at the end of the async inspection iHaves.
   297  	// Args:
   298  	//
   299  	//	duplicateTopicIds: the total number of duplicate topic ids received by the node on the iHave messages at the end of the async inspection of the RPC.
   300  	//	duplicateMessageIds: the number of duplicate message ids received by the node on the iHave messages at the end of the async inspection of the RPC.
   301  	OnIHaveMessagesInspected(duplicateTopicIds int, duplicateMessageIds int)
   302  
   303  	// OnIHaveDuplicateTopicIdsExceedThreshold tracks the number of times that the async inspection of iHave messages of a single RPC failed due to the total number of duplicate topic ids
   304  	// received by the node on the iHave messages of that RPC exceeding the threshold, which results in a misbehaviour report.
   305  	OnIHaveDuplicateTopicIdsExceedThreshold()
   306  
   307  	// OnIHaveDuplicateMessageIdsExceedThreshold tracks the number of times that the async inspection of iHave messages of a single RPC failed due to the total number of duplicate message ids
   308  	// received by the node on an iHave message exceeding the threshold, which results in a misbehaviour report.
   309  	OnIHaveDuplicateMessageIdsExceedThreshold()
   310  
   311  	// OnInvalidTopicIdDetectedForControlMessage tracks the number of times that the async inspection of a control message type on a single RPC failed due to an invalid topic id.
   312  	// Args:
   313  	// - messageType: the type of the control message that was truncated.
   314  	OnInvalidTopicIdDetectedForControlMessage(messageType p2pmsg.ControlMessageType)
   315  
   316  	// OnActiveClusterIDsNotSetErr tracks the number of times that the async inspection of a control message type on a single RPC failed due to active cluster ids not set inspection failure.
   317  	// This is not causing a misbehaviour report.
   318  	OnActiveClusterIDsNotSetErr()
   319  
   320  	// OnUnstakedPeerInspectionFailed tracks the number of times that the async inspection of a control message type on a single RPC failed due to unstaked peer inspection failure.
   321  	// This is not causing a misbehaviour report.
   322  	OnUnstakedPeerInspectionFailed()
   323  
   324  	// OnInvalidControlMessageNotificationSent tracks the number of times that the async inspection of a control message failed and resulted in dissemination of an invalid control message was sent.
   325  	OnInvalidControlMessageNotificationSent()
   326  
   327  	// OnPublishMessagesInspectionErrorExceedsThreshold tracks the number of times that async inspection of publish messages failed due to the number of errors.
   328  	OnPublishMessagesInspectionErrorExceedsThreshold()
   329  
   330  	// OnPruneDuplicateTopicIdsExceedThreshold tracks the number of times that the async inspection of prune messages for an RPC failed due to the number of duplicate topic ids
   331  	// received by the node on prune messages of the same RPC excesses threshold, which results in a misbehaviour report.
   332  	OnPruneDuplicateTopicIdsExceedThreshold()
   333  
   334  	// OnPruneMessageInspected is called at the end of the async inspection of prune messages of the RPC, regardless of the result of the inspection.
   335  	// Args:
   336  	// 	duplicateTopicIds: the number of duplicate topic ids received by the node on the prune messages of the RPC at the end of the async inspection prunes.
   337  	OnPruneMessageInspected(duplicateTopicIds int)
   338  
   339  	// OnGraftDuplicateTopicIdsExceedThreshold tracks the number of times that the async inspection of the graft messages of a single RPC failed due to the number of duplicate topic ids
   340  	// received by the node on graft messages of the same RPC excesses threshold, which results in a misbehaviour report.
   341  	OnGraftDuplicateTopicIdsExceedThreshold()
   342  
   343  	// OnGraftMessageInspected is called at the end of the async inspection of graft messages of a single RPC, regardless of the result of the inspection.
   344  	// Args:
   345  	// 	duplicateTopicIds: the number of duplicate topic ids received by the node on the graft messages at the end of the async inspection of a single RPC.
   346  	OnGraftMessageInspected(duplicateTopicIds int)
   347  
   348  	// OnPublishMessageInspected is called at the end of the async inspection of publish messages of a single RPC, regardless of the result of the inspection.
   349  	// It tracks the total number of errors detected during the async inspection of the rpc together with their individual breakdown.
   350  	// Args:
   351  	// - errCount: the number of errors that occurred during the async inspection of publish messages.
   352  	// - invalidTopicIdsCount: the number of times that an invalid topic id was detected during the async inspection of publish messages.
   353  	// - invalidSubscriptionsCount: the number of times that an invalid subscription was detected during the async inspection of publish messages.
   354  	// - invalidSendersCount: the number of times that an invalid sender was detected during the async inspection of publish messages.
   355  	OnPublishMessageInspected(totalErrCount int, invalidTopicIdsCount int, invalidSubscriptionsCount int, invalidSendersCount int)
   356  }
   357  
   358  // NetworkInboundQueueMetrics encapsulates the metrics collectors for the inbound queue of the networking layer.
   359  type NetworkInboundQueueMetrics interface {
   360  
   361  	// MessageAdded increments the metric tracking the number of messages in the queue with the given priority
   362  	MessageAdded(priority int)
   363  
   364  	// MessageRemoved decrements the metric tracking the number of messages in the queue with the given priority
   365  	MessageRemoved(priority int)
   366  
   367  	// QueueDuration tracks the time spent by a message with the given priority in the queue
   368  	QueueDuration(duration time.Duration, priority int)
   369  }
   370  
   371  // NetworkCoreMetrics encapsulates the metrics collectors for the core networking layer functionality.
   372  type NetworkCoreMetrics interface {
   373  	NetworkInboundQueueMetrics
   374  	AlspMetrics
   375  	NetworkSecurityMetrics
   376  
   377  	// OutboundMessageSent collects metrics related to a message sent by the node.
   378  	OutboundMessageSent(sizeBytes int, topic string, protocol string, messageType string)
   379  	// InboundMessageReceived collects metrics related to a message received by the node.
   380  	InboundMessageReceived(sizeBytes int, topic string, protocol string, messageType string)
   381  	// DuplicateInboundMessagesDropped increments the metric tracking the number of duplicate messages dropped by the node.
   382  	DuplicateInboundMessagesDropped(topic string, protocol string, messageType string)
   383  	// UnicastMessageSendingStarted increments the metric tracking the number of unicast messages sent by the node.
   384  	UnicastMessageSendingStarted(topic string)
   385  	// UnicastMessageSendingCompleted decrements the metric tracking the number of unicast messages sent by the node.
   386  	UnicastMessageSendingCompleted(topic string)
   387  	// MessageProcessingStarted increments the metric tracking the number of messages being processed by the node.
   388  	MessageProcessingStarted(topic string)
   389  	// MessageProcessingFinished tracks the time spent by the node to process a message and decrements the metric tracking
   390  	// the number of messages being processed by the node.
   391  	MessageProcessingFinished(topic string, duration time.Duration)
   392  }
   393  
   394  // LibP2PConnectionMetrics encapsulates the metrics collectors for the connection manager of the libp2p node.
   395  type LibP2PConnectionMetrics interface {
   396  	// OutboundConnections updates the metric tracking the number of outbound connections of this node
   397  	OutboundConnections(connectionCount uint)
   398  
   399  	// InboundConnections updates the metric tracking the number of inbound connections of this node
   400  	InboundConnections(connectionCount uint)
   401  }
   402  
   403  // AlspMetrics encapsulates the metrics collectors for the Application Layer Spam Prevention (ALSP) module, which
   404  // is part of the networking layer. ALSP is responsible to prevent spam attacks on the application layer messages that
   405  // appear to be valid for the networking layer but carry on a malicious intent on the application layer (i.e., Flow protocols).
   406  type AlspMetrics interface {
   407  	// OnMisbehaviorReported is called when a misbehavior is reported by the application layer to ALSP.
   408  	// An engine detecting a spamming-related misbehavior reports it to the ALSP module.
   409  	// Args:
   410  	// - channel: the channel on which the misbehavior was reported
   411  	// - misbehaviorType: the type of misbehavior reported
   412  	OnMisbehaviorReported(channel string, misbehaviorType string)
   413  }
   414  
   415  // NetworkMetrics is the blanket abstraction that encapsulates the metrics collectors for the networking layer.
   416  type NetworkMetrics interface {
   417  	LibP2PMetrics
   418  	NetworkCoreMetrics
   419  }
   420  
   421  // EngineMetrics is a generic metrics consumer for node-internal data processing
   422  // components (aka engines). Implementations must be non-blocking and concurrency safe.
   423  type EngineMetrics interface {
   424  	// MessageSent reports that the engine transmitted the message over the network.
   425  	// Unicasts, broadcasts, and multicasts are all reported once.
   426  	MessageSent(engine string, message string)
   427  	// MessageReceived reports that the engine received the message over the network.
   428  	MessageReceived(engine string, message string)
   429  	// MessageHandled reports that the engine has finished processing the message.
   430  	// Both invalid and valid messages should be reported.
   431  	// A message must be reported as either handled or dropped, not both.
   432  	MessageHandled(engine string, messages string)
   433  	// InboundMessageDropped reports that the engine has dropped inbound message without processing it.
   434  	// Inbound messages must be reported as either handled or dropped, not both.
   435  	InboundMessageDropped(engine string, messages string)
   436  	// OutboundMessageDropped reports that the engine has dropped outbound message without processing it.
   437  	// Outbound messages must be reported as either sent or dropped, not both.
   438  	OutboundMessageDropped(engine string, messages string)
   439  }
   440  
   441  type ComplianceMetrics interface {
   442  	FinalizedHeight(height uint64)
   443  	CommittedEpochFinalView(view uint64)
   444  	EpochTransitionHeight(height uint64)
   445  	SealedHeight(height uint64)
   446  	BlockFinalized(*flow.Block)
   447  	BlockSealed(*flow.Block)
   448  	CurrentEpochCounter(counter uint64)
   449  	CurrentEpochPhase(phase flow.EpochPhase)
   450  	CurrentEpochFinalView(view uint64)
   451  	CurrentDKGPhase1FinalView(view uint64)
   452  	CurrentDKGPhase2FinalView(view uint64)
   453  	CurrentDKGPhase3FinalView(view uint64)
   454  	EpochEmergencyFallbackTriggered()
   455  }
   456  
   457  type CleanerMetrics interface {
   458  	RanGC(took time.Duration)
   459  }
   460  
   461  type CacheMetrics interface {
   462  	// CacheEntries report the total number of cached items
   463  	CacheEntries(resource string, entries uint)
   464  	// CacheHit report the number of times the queried item is found in the cache
   465  	CacheHit(resource string)
   466  	// CacheNotFound records the number of times the queried item was not found in either cache or database.
   467  	CacheNotFound(resource string)
   468  	// CacheMiss report the number of times the queried item is not found in the cache, but found in the database.
   469  	CacheMiss(resource string)
   470  }
   471  
   472  type MempoolMetrics interface {
   473  	MempoolEntries(resource string, entries uint)
   474  	Register(resource string, entriesFunc EntriesFunc) error
   475  }
   476  
   477  type HotstuffMetrics interface {
   478  	// HotStuffBusyDuration reports Metrics C6 HotStuff Busy Duration
   479  	HotStuffBusyDuration(duration time.Duration, event string)
   480  
   481  	// HotStuffIdleDuration reports Metrics C6 HotStuff Idle Duration
   482  	HotStuffIdleDuration(duration time.Duration)
   483  
   484  	// HotStuffWaitDuration reports Metrics C6 HotStuff Idle Duration - the time between receiving and
   485  	// enqueueing a message to beginning to process that message.
   486  	HotStuffWaitDuration(duration time.Duration, event string)
   487  
   488  	// SetCurView reports Metrics C8: Current View maintained by Pacemaker.
   489  	SetCurView(view uint64)
   490  
   491  	// SetQCView reports Metrics C9: View of the newest QC known to Pacemaker.
   492  	SetQCView(view uint64)
   493  
   494  	// SetTCView reports last TC known to Pacemaker.
   495  	SetTCView(view uint64)
   496  
   497  	// CountSkipped counts the number of skips we did.
   498  	CountSkipped()
   499  
   500  	// CountTimeout tracks the number of views that this replica left due to observing a TC.
   501  	CountTimeout()
   502  
   503  	// SetTimeout sets the current timeout duration
   504  	SetTimeout(duration time.Duration)
   505  
   506  	// BlockProcessingDuration measures the time which the compliance engine
   507  	// spends to process one block proposal.
   508  	BlockProcessingDuration(duration time.Duration)
   509  
   510  	// VoteProcessingDuration measures the time which the hotstuff.VoteAggregator
   511  	// spends to process one vote.
   512  	VoteProcessingDuration(duration time.Duration)
   513  
   514  	// TimeoutObjectProcessingDuration measures the time which the hotstuff.TimeoutAggregator
   515  	// spends to process one timeout object.
   516  	TimeoutObjectProcessingDuration(duration time.Duration)
   517  
   518  	// CommitteeProcessingDuration measures the time which the HotStuff's core logic
   519  	// spends in the hotstuff.Replicas component, i.e. the time determining consensus
   520  	// committee relations.
   521  	CommitteeProcessingDuration(duration time.Duration)
   522  
   523  	// SignerProcessingDuration measures the time which the HotStuff's core logic
   524  	// spends in the hotstuff.Signer component, i.e. the with crypto-related operations.
   525  	SignerProcessingDuration(duration time.Duration)
   526  
   527  	// ValidatorProcessingDuration measures the time which the HotStuff's core logic
   528  	// spends in the hotstuff.Validator component, i.e. the with verifying
   529  	// consensus messages.
   530  	ValidatorProcessingDuration(duration time.Duration)
   531  
   532  	// PayloadProductionDuration measures the time which the HotStuff's core logic
   533  	// spends in the module.Builder component, i.e. the with generating block payloads.
   534  	PayloadProductionDuration(duration time.Duration)
   535  
   536  	// TimeoutCollectorsRange collects information from the node's `TimeoutAggregator` component.
   537  	// Specifically, it measurers the number of views for which we are currently collecting timeouts
   538  	// (i.e. the number of `TimeoutCollector` instances we are maintaining) and their lowest/highest view.
   539  	TimeoutCollectorsRange(lowestRetainedView uint64, newestViewCreatedCollector uint64, activeCollectors int)
   540  }
   541  
   542  type CruiseCtlMetrics interface {
   543  
   544  	// PIDError measures the current error values for the proportional, integration,
   545  	// and derivative terms of the PID controller.
   546  	PIDError(p, i, d float64)
   547  
   548  	// TargetProposalDuration measures the current value of the Block Time Controller output:
   549  	// the target duration from parent to child proposal.
   550  	TargetProposalDuration(duration time.Duration)
   551  
   552  	// ControllerOutput measures the output of the cruise control PID controller.
   553  	// Concretely, this is the quantity to subtract from the baseline view duration.
   554  	ControllerOutput(duration time.Duration)
   555  }
   556  
   557  type CollectionMetrics interface {
   558  	// TransactionIngested is called when a new transaction is ingested by the
   559  	// node. It increments the total count of ingested transactions and starts
   560  	// a tx->col span for the transaction.
   561  	TransactionIngested(txID flow.Identifier)
   562  
   563  	// ClusterBlockProposed is called when a new collection is proposed by us or
   564  	// any other node in the cluster.
   565  	ClusterBlockProposed(block *cluster.Block)
   566  
   567  	// ClusterBlockFinalized is called when a collection is finalized.
   568  	ClusterBlockFinalized(block *cluster.Block)
   569  }
   570  
   571  type ConsensusMetrics interface {
   572  	// StartCollectionToFinalized reports Metrics C1: Collection Received by CCL→ Collection Included in Finalized Block
   573  	StartCollectionToFinalized(collectionID flow.Identifier)
   574  
   575  	// FinishCollectionToFinalized reports Metrics C1: Collection Received by CCL→ Collection Included in Finalized Block
   576  	FinishCollectionToFinalized(collectionID flow.Identifier)
   577  
   578  	// StartBlockToSeal reports Metrics C4: Block Received by CCL → Block Seal in finalized block
   579  	StartBlockToSeal(blockID flow.Identifier)
   580  
   581  	// FinishBlockToSeal reports Metrics C4: Block Received by CCL → Block Seal in finalized block
   582  	FinishBlockToSeal(blockID flow.Identifier)
   583  
   584  	// EmergencySeal increments the number of seals that were created in emergency mode
   585  	EmergencySeal()
   586  
   587  	// OnReceiptProcessingDuration records the number of seconds spent processing a receipt
   588  	OnReceiptProcessingDuration(duration time.Duration)
   589  
   590  	// OnApprovalProcessingDuration records the number of seconds spent processing an approval
   591  	OnApprovalProcessingDuration(duration time.Duration)
   592  
   593  	// CheckSealingDuration records absolute time for the full sealing check by the consensus match engine
   594  	CheckSealingDuration(duration time.Duration)
   595  }
   596  
   597  type VerificationMetrics interface {
   598  	// OnBlockConsumerJobDone is invoked by block consumer whenever it is notified a job is done by a worker. It
   599  	// sets the last processed block job index.
   600  	OnBlockConsumerJobDone(uint64)
   601  	// OnChunkConsumerJobDone is invoked by chunk consumer whenever it is notified a job is done by a worker. It
   602  	// sets the last processed chunk job index.
   603  	OnChunkConsumerJobDone(uint64)
   604  	// OnExecutionResultReceivedAtAssignerEngine is called whenever a new execution result arrives
   605  	// at Assigner engine. It increments total number of received execution results.
   606  	OnExecutionResultReceivedAtAssignerEngine()
   607  
   608  	// OnVerifiableChunkReceivedAtVerifierEngine increments a counter that keeps track of number of verifiable chunks received at
   609  	// verifier engine from fetcher engine.
   610  	OnVerifiableChunkReceivedAtVerifierEngine()
   611  
   612  	// OnFinalizedBlockArrivedAtAssigner sets a gauge that keeps track of number of the latest block height arrives
   613  	// at assigner engine. Note that it assumes blocks are coming to assigner engine in strictly increasing order of their height.
   614  	OnFinalizedBlockArrivedAtAssigner(height uint64)
   615  
   616  	// OnChunksAssignmentDoneAtAssigner increments a counter that keeps track of the total number of assigned chunks to
   617  	// the verification node.
   618  	OnChunksAssignmentDoneAtAssigner(chunks int)
   619  
   620  	// OnAssignedChunkProcessedAtAssigner increments a counter that keeps track of the total number of assigned chunks pushed by
   621  	// assigner engine to the fetcher engine.
   622  	OnAssignedChunkProcessedAtAssigner()
   623  
   624  	// OnAssignedChunkReceivedAtFetcher increments a counter that keeps track of number of assigned chunks arrive at fetcher engine.
   625  	OnAssignedChunkReceivedAtFetcher()
   626  
   627  	// OnChunkDataPackRequestSentByFetcher increments a counter that keeps track of number of chunk data pack requests that fetcher engine
   628  	// sends to requester engine.
   629  	OnChunkDataPackRequestSentByFetcher()
   630  
   631  	// OnChunkDataPackRequestReceivedByRequester increments a counter that keeps track of number of chunk data pack requests
   632  	// arrive at the requester engine from the fetcher engine.
   633  	OnChunkDataPackRequestReceivedByRequester()
   634  
   635  	// OnChunkDataPackRequestDispatchedInNetwork increments a counter that keeps track of number of chunk data pack requests that the
   636  	// requester engine dispatches in the network (to the execution nodes).
   637  	OnChunkDataPackRequestDispatchedInNetworkByRequester()
   638  
   639  	// OnChunkDataPackResponseReceivedFromNetwork increments a counter that keeps track of number of chunk data pack responses that the
   640  	// requester engine receives from execution nodes (through network).
   641  	OnChunkDataPackResponseReceivedFromNetworkByRequester()
   642  
   643  	// SetMaxChunkDataPackAttemptsForNextUnsealedHeightAtRequester is invoked when a cycle of requesting chunk data packs is done by requester engine.
   644  	// It updates the maximum number of attempts made by requester engine for requesting the chunk data packs of the next unsealed height.
   645  	// The maximum is taken over the history of all chunk data packs requested during that cycle that belong to the next unsealed height.
   646  	SetMaxChunkDataPackAttemptsForNextUnsealedHeightAtRequester(attempts uint64)
   647  
   648  	// OnChunkDataPackSentToFetcher increments a counter that keeps track of number of chunk data packs sent to the fetcher engine from
   649  	// requester engine.
   650  	OnChunkDataPackSentToFetcher()
   651  
   652  	// OnChunkDataPackArrivedAtFetcher increments a counter that keeps track of number of chunk data packs arrived at fetcher engine from
   653  	// requester engine.
   654  	OnChunkDataPackArrivedAtFetcher()
   655  
   656  	// OnVerifiableChunkSentToVerifier increments a counter that keeps track of number of verifiable chunks fetcher engine sent to verifier engine.
   657  	OnVerifiableChunkSentToVerifier()
   658  
   659  	// OnResultApprovalDispatchedInNetwork increments a counter that keeps track of number of result approvals dispatched in the network
   660  	// by verifier engine.
   661  	OnResultApprovalDispatchedInNetworkByVerifier()
   662  }
   663  
   664  // LedgerMetrics provides an interface to record Ledger Storage metrics.
   665  // Ledger storage is non-linear (fork-aware) so certain metrics are averaged
   666  // and computed before emitting for better visibility
   667  type LedgerMetrics interface {
   668  	// ForestApproxMemorySize records approximate memory usage of forest (all in-memory trees)
   669  	ForestApproxMemorySize(bytes uint64)
   670  
   671  	// ForestNumberOfTrees current number of trees in a forest (in memory)
   672  	ForestNumberOfTrees(number uint64)
   673  
   674  	// LatestTrieRegCount records the number of unique register allocated (the latest created trie)
   675  	LatestTrieRegCount(number uint64)
   676  
   677  	// LatestTrieRegCountDiff records the difference between the number of unique register allocated of the latest created trie and parent trie
   678  	LatestTrieRegCountDiff(number int64)
   679  
   680  	// LatestTrieRegSize records the size of unique register allocated (the latest created trie)
   681  	LatestTrieRegSize(size uint64)
   682  
   683  	// LatestTrieRegSizeDiff records the difference between the size of unique register allocated of the latest created trie and parent trie
   684  	LatestTrieRegSizeDiff(size int64)
   685  
   686  	// LatestTrieMaxDepthTouched records the maximum depth touched of the lastest created trie
   687  	LatestTrieMaxDepthTouched(maxDepth uint16)
   688  
   689  	// UpdateCount increase a counter of performed updates
   690  	UpdateCount()
   691  
   692  	// ProofSize records a proof size
   693  	ProofSize(bytes uint32)
   694  
   695  	// UpdateValuesNumber accumulates number of updated values
   696  	UpdateValuesNumber(number uint64)
   697  
   698  	// UpdateValuesSize total size (in bytes) of updates values
   699  	UpdateValuesSize(byte uint64)
   700  
   701  	// UpdateDuration records absolute time for the update of a trie
   702  	UpdateDuration(duration time.Duration)
   703  
   704  	// UpdateDurationPerItem records update time for single value (total duration / number of updated values)
   705  	UpdateDurationPerItem(duration time.Duration)
   706  
   707  	// ReadValuesNumber accumulates number of read values
   708  	ReadValuesNumber(number uint64)
   709  
   710  	// ReadValuesSize total size (in bytes) of read values
   711  	ReadValuesSize(byte uint64)
   712  
   713  	// ReadDuration records absolute time for the read from a trie
   714  	ReadDuration(duration time.Duration)
   715  
   716  	// ReadDurationPerItem records read time for single value (total duration / number of read values)
   717  	ReadDurationPerItem(duration time.Duration)
   718  }
   719  
   720  type WALMetrics interface {
   721  	// ExecutionCheckpointSize reports the size of a checkpoint in bytes
   722  	ExecutionCheckpointSize(bytes uint64)
   723  }
   724  
   725  type RateLimitedBlockstoreMetrics interface {
   726  	BytesRead(int)
   727  }
   728  
   729  type BitswapMetrics interface {
   730  	Peers(prefix string, n int)
   731  	Wantlist(prefix string, n int)
   732  	BlobsReceived(prefix string, n uint64)
   733  	DataReceived(prefix string, n uint64)
   734  	BlobsSent(prefix string, n uint64)
   735  	DataSent(prefix string, n uint64)
   736  	DupBlobsReceived(prefix string, n uint64)
   737  	DupDataReceived(prefix string, n uint64)
   738  	MessagesReceived(prefix string, n uint64)
   739  }
   740  
   741  type ExecutionDataRequesterMetrics interface {
   742  	// ExecutionDataFetchStarted records an in-progress download
   743  	ExecutionDataFetchStarted()
   744  
   745  	// ExecutionDataFetchFinished records a completed download
   746  	ExecutionDataFetchFinished(duration time.Duration, success bool, height uint64)
   747  
   748  	// NotificationSent reports that ExecutionData received notifications were sent for a block height
   749  	NotificationSent(height uint64)
   750  
   751  	// FetchRetried reports that a download retry was processed
   752  	FetchRetried()
   753  }
   754  
   755  type ExecutionStateIndexerMetrics interface {
   756  	// BlockIndexed records metrics from indexing execution data from a single block.
   757  	BlockIndexed(height uint64, duration time.Duration, events, registers, transactionResults int)
   758  
   759  	// BlockReindexed records that a previously indexed block was indexed again.
   760  	BlockReindexed()
   761  
   762  	// InitializeLatestHeight records the latest height that has been indexed.
   763  	// This should only be used during startup. After startup, use BlockIndexed to record newly
   764  	// indexed heights.
   765  	InitializeLatestHeight(height uint64)
   766  }
   767  
   768  type RuntimeMetrics interface {
   769  	// RuntimeTransactionParsed reports the time spent parsing a single transaction
   770  	RuntimeTransactionParsed(dur time.Duration)
   771  
   772  	// RuntimeTransactionChecked reports the time spent checking a single transaction
   773  	RuntimeTransactionChecked(dur time.Duration)
   774  
   775  	// RuntimeTransactionInterpreted reports the time spent interpreting a single transaction
   776  	RuntimeTransactionInterpreted(dur time.Duration)
   777  
   778  	// RuntimeSetNumberOfAccounts Sets the total number of accounts on the network
   779  	RuntimeSetNumberOfAccounts(count uint64)
   780  
   781  	// RuntimeTransactionProgramsCacheMiss reports a programs cache miss
   782  	// during transaction execution
   783  	RuntimeTransactionProgramsCacheMiss()
   784  
   785  	// RuntimeTransactionProgramsCacheHit reports a programs cache hit
   786  	// during transaction execution
   787  	RuntimeTransactionProgramsCacheHit()
   788  }
   789  
   790  type ProviderMetrics interface {
   791  	// ChunkDataPackRequestProcessed is executed every time a chunk data pack request is picked up for processing at execution node.
   792  	// It increases the request processed counter by one.
   793  	ChunkDataPackRequestProcessed()
   794  }
   795  
   796  type ExecutionDataProviderMetrics interface {
   797  	RootIDComputed(duration time.Duration, numberOfChunks int)
   798  	AddBlobsSucceeded(duration time.Duration, totalSize uint64)
   799  	AddBlobsFailed()
   800  }
   801  
   802  type ExecutionDataRequesterV2Metrics interface {
   803  	FulfilledHeight(blockHeight uint64)
   804  	ReceiptSkipped()
   805  	RequestSucceeded(blockHeight uint64, duration time.Duration, totalSize uint64, numberOfAttempts int)
   806  	RequestFailed(duration time.Duration, retryable bool)
   807  	RequestCanceled()
   808  	ResponseDropped()
   809  }
   810  
   811  type ExecutionDataPrunerMetrics interface {
   812  	Pruned(height uint64, duration time.Duration)
   813  }
   814  
   815  type RestMetrics interface {
   816  	// Example recorder taken from:
   817  	// https://github.com/slok/go-http-metrics/blob/master/metrics/prometheus/prometheus.go
   818  	httpmetrics.Recorder
   819  	AddTotalRequests(ctx context.Context, method string, routeName string)
   820  }
   821  
   822  type GRPCConnectionPoolMetrics interface {
   823  	// TotalConnectionsInPool updates the number connections to collection/execution nodes stored in the pool, and the size of the pool
   824  	TotalConnectionsInPool(connectionCount uint, connectionPoolSize uint)
   825  
   826  	// ConnectionFromPoolReused tracks the number of times a connection to a collection/execution node is reused from the connection pool
   827  	ConnectionFromPoolReused()
   828  
   829  	// ConnectionAddedToPool tracks the number of times a collection/execution node is added to the connection pool
   830  	ConnectionAddedToPool()
   831  
   832  	// NewConnectionEstablished tracks the number of times a new grpc connection is established
   833  	NewConnectionEstablished()
   834  
   835  	// ConnectionFromPoolInvalidated tracks the number of times a cached grpc connection is invalidated and closed
   836  	ConnectionFromPoolInvalidated()
   837  
   838  	// ConnectionFromPoolUpdated tracks the number of times a cached connection is updated
   839  	ConnectionFromPoolUpdated()
   840  
   841  	// ConnectionFromPoolEvicted tracks the number of times a cached connection is evicted from the cache
   842  	ConnectionFromPoolEvicted()
   843  }
   844  
   845  type AccessMetrics interface {
   846  	RestMetrics
   847  	GRPCConnectionPoolMetrics
   848  	TransactionMetrics
   849  	BackendScriptsMetrics
   850  
   851  	// UpdateExecutionReceiptMaxHeight is called whenever we store an execution receipt from a block from a newer height
   852  	UpdateExecutionReceiptMaxHeight(height uint64)
   853  
   854  	// UpdateLastFullBlockHeight tracks the height of the last block for which all collections were received
   855  	UpdateLastFullBlockHeight(height uint64)
   856  }
   857  
   858  type ExecutionResultStats struct {
   859  	ComputationUsed                 uint64
   860  	MemoryUsed                      uint64
   861  	EventCounts                     int
   862  	EventSize                       int
   863  	NumberOfRegistersTouched        int
   864  	NumberOfBytesWrittenToRegisters int
   865  	NumberOfCollections             int
   866  	NumberOfTransactions            int
   867  }
   868  
   869  func (stats *ExecutionResultStats) Merge(other ExecutionResultStats) {
   870  	stats.ComputationUsed += other.ComputationUsed
   871  	stats.MemoryUsed += other.MemoryUsed
   872  	stats.EventCounts += other.EventCounts
   873  	stats.EventSize += other.EventSize
   874  	stats.NumberOfRegistersTouched += other.NumberOfRegistersTouched
   875  	stats.NumberOfBytesWrittenToRegisters += other.NumberOfBytesWrittenToRegisters
   876  	stats.NumberOfCollections += other.NumberOfCollections
   877  	stats.NumberOfTransactions += other.NumberOfTransactions
   878  }
   879  
   880  type ExecutionMetrics interface {
   881  	LedgerMetrics
   882  	RuntimeMetrics
   883  	ProviderMetrics
   884  	WALMetrics
   885  
   886  	// StartBlockReceivedToExecuted starts a span to trace the duration of a block
   887  	// from being received for execution to execution being finished
   888  	StartBlockReceivedToExecuted(blockID flow.Identifier)
   889  
   890  	// FinishBlockReceivedToExecuted finishes a span to trace the duration of a block
   891  	// from being received for execution to execution being finished
   892  	FinishBlockReceivedToExecuted(blockID flow.Identifier)
   893  
   894  	// ExecutionStorageStateCommitment reports the storage size of a state commitment in bytes
   895  	ExecutionStorageStateCommitment(bytes int64)
   896  
   897  	// ExecutionLastExecutedBlockHeight reports last executed block height
   898  	ExecutionLastExecutedBlockHeight(height uint64)
   899  
   900  	// ExecutionLastFinalizedExecutedBlockHeight reports last finalized and executed block height
   901  	ExecutionLastFinalizedExecutedBlockHeight(height uint64)
   902  
   903  	// ExecutionBlockExecuted reports the total time and computation spent on executing a block
   904  	ExecutionBlockExecuted(dur time.Duration, stats ExecutionResultStats)
   905  
   906  	// ExecutionBlockExecutionEffortVectorComponent reports the unweighted effort of given ComputationKind at block level
   907  	ExecutionBlockExecutionEffortVectorComponent(string, uint)
   908  
   909  	// ExecutionBlockCachedPrograms reports the number of cached programs at the end of a block
   910  	ExecutionBlockCachedPrograms(programs int)
   911  
   912  	// ExecutionCollectionExecuted reports the total time and computation spent on executing a collection
   913  	ExecutionCollectionExecuted(dur time.Duration, stats ExecutionResultStats)
   914  
   915  	// ExecutionTransactionExecuted reports stats on executing a single transaction
   916  	ExecutionTransactionExecuted(
   917  		dur time.Duration,
   918  		numTxnConflictRetries int,
   919  		compUsed uint64,
   920  		memoryUsed uint64,
   921  		eventCounts int,
   922  		eventSize int,
   923  		failed bool)
   924  
   925  	// ExecutionChunkDataPackGenerated reports stats on chunk data pack generation
   926  	ExecutionChunkDataPackGenerated(proofSize, numberOfTransactions int)
   927  
   928  	// ExecutionScriptExecuted reports the time and memory spent on executing an script
   929  	ExecutionScriptExecuted(dur time.Duration, compUsed, memoryUsed, memoryEstimate uint64)
   930  
   931  	// ExecutionCollectionRequestSent reports when a request for a collection is sent to a collection node
   932  	ExecutionCollectionRequestSent()
   933  
   934  	// Unused
   935  	ExecutionCollectionRequestRetried()
   936  
   937  	// ExecutionSync reports when the state syncing is triggered or stopped.
   938  	ExecutionSync(syncing bool)
   939  
   940  	// Upload metrics
   941  	ExecutionBlockDataUploadStarted()
   942  	ExecutionBlockDataUploadFinished(dur time.Duration)
   943  	ExecutionComputationResultUploaded()
   944  	ExecutionComputationResultUploadRetried()
   945  
   946  	UpdateCollectionMaxHeight(height uint64)
   947  }
   948  
   949  type BackendScriptsMetrics interface {
   950  	// ScriptExecuted records the round trip time while executing a script
   951  	ScriptExecuted(dur time.Duration, size int)
   952  
   953  	// ScriptExecutionErrorLocal records script execution failures from local execution
   954  	ScriptExecutionErrorLocal()
   955  
   956  	// ScriptExecutionErrorOnExecutionNode records script execution failures on Execution Nodes
   957  	ScriptExecutionErrorOnExecutionNode()
   958  
   959  	// ScriptExecutionResultMismatch records script execution result mismatches between local and
   960  	// execution nodes
   961  	ScriptExecutionResultMismatch()
   962  
   963  	// ScriptExecutionResultMatch records script execution result matches between local and
   964  	// execution nodes
   965  	ScriptExecutionResultMatch()
   966  
   967  	// ScriptExecutionErrorMismatch records script execution error mismatches between local and
   968  	// execution nodes
   969  	ScriptExecutionErrorMismatch()
   970  
   971  	// ScriptExecutionErrorMatch records script execution error matches between local and
   972  	// execution nodes
   973  	ScriptExecutionErrorMatch()
   974  
   975  	// ScriptExecutionNotIndexed records script execution matches where data for the block is not
   976  	// indexed locally yet
   977  	ScriptExecutionNotIndexed()
   978  }
   979  
   980  type TransactionMetrics interface {
   981  	// Record the round trip time while getting a transaction result
   982  	TransactionResultFetched(dur time.Duration, size int)
   983  
   984  	// TransactionReceived starts tracking of transaction execution/finalization/sealing
   985  	TransactionReceived(txID flow.Identifier, when time.Time)
   986  
   987  	// TransactionFinalized reports the time spent between the transaction being received and finalized. Reporting only
   988  	// works if the transaction was earlier added as received.
   989  	TransactionFinalized(txID flow.Identifier, when time.Time)
   990  
   991  	// TransactionExecuted reports the time spent between the transaction being received and executed. Reporting only
   992  	// works if the transaction was earlier added as received.
   993  	TransactionExecuted(txID flow.Identifier, when time.Time)
   994  
   995  	// TransactionExpired tracks number of expired transactions
   996  	TransactionExpired(txID flow.Identifier)
   997  
   998  	// TransactionSubmissionFailed should be called whenever we try to submit a transaction and it fails
   999  	TransactionSubmissionFailed()
  1000  }
  1001  
  1002  type PingMetrics interface {
  1003  	// NodeReachable tracks the round trip time in milliseconds taken to ping a node
  1004  	// The nodeInfo provides additional information about the node such as the name of the node operator
  1005  	NodeReachable(node *flow.Identity, nodeInfo string, rtt time.Duration)
  1006  
  1007  	// NodeInfo tracks the software version, sealed height and hotstuff view of a node
  1008  	NodeInfo(node *flow.Identity, nodeInfo string, version string, sealedHeight uint64, hotstuffCurView uint64)
  1009  }
  1010  
  1011  type HeroCacheMetrics interface {
  1012  	// BucketAvailableSlots keeps track of number of available slots in buckets of cache.
  1013  	BucketAvailableSlots(uint64, uint64)
  1014  
  1015  	// OnKeyPutAttempt is called whenever a new (key, value) pair is attempted to be put in cache.
  1016  	// It does not reflect whether the put was successful or not.
  1017  	// A (key, value) pair put attempt may fail if the cache is full, or the key already exists.
  1018  	OnKeyPutAttempt(size uint32)
  1019  
  1020  	// OnKeyPutSuccess is called whenever a new (key, entity) pair is successfully added to the cache.
  1021  	OnKeyPutSuccess(size uint32)
  1022  
  1023  	// OnKeyPutDrop is called whenever a new (key, entity) pair is dropped from the cache due to full cache.
  1024  	OnKeyPutDrop()
  1025  
  1026  	// OnKeyPutDeduplicated is tracking the total number of unsuccessful writes caused by adding a duplicate key to the cache.
  1027  	// A duplicate key is dropped by the cache when it is written to the cache.
  1028  	// Note: in context of HeroCache, the key corresponds to the identifier of its entity. Hence, a duplicate key corresponds to
  1029  	// a duplicate entity.
  1030  	OnKeyPutDeduplicated()
  1031  
  1032  	// OnKeyRemoved is called whenever a (key, entity) pair is removed from the cache.
  1033  	OnKeyRemoved(size uint32)
  1034  
  1035  	// OnKeyGetSuccess tracks total number of successful read queries.
  1036  	// A read query is successful if the entity corresponding to its key is available in the cache.
  1037  	// Note: in context of HeroCache, the key corresponds to the identifier of its entity.
  1038  	OnKeyGetSuccess()
  1039  
  1040  	// OnKeyGetFailure tracks total number of unsuccessful read queries.
  1041  	// A read query is unsuccessful if the entity corresponding to its key is not available in the cache.
  1042  	// Note: in context of HeroCache, the key corresponds to the identifier of its entity.
  1043  	OnKeyGetFailure()
  1044  
  1045  	// OnEntityEjectionDueToFullCapacity is called whenever adding a new (key, entity) to the cache results in ejection of another (key', entity') pair.
  1046  	// This normally happens -- and is expected -- when the cache is full.
  1047  	// Note: in context of HeroCache, the key corresponds to the identifier of its entity.
  1048  	OnEntityEjectionDueToFullCapacity()
  1049  
  1050  	// OnEntityEjectionDueToEmergency is called whenever a bucket is found full and all of its keys are valid, i.e.,
  1051  	// each key belongs to an existing (key, entity) pair.
  1052  	// Hence, adding a new key to that bucket will replace the oldest valid key inside that bucket.
  1053  	// Note: in context of HeroCache, the key corresponds to the identifier of its entity.
  1054  	OnEntityEjectionDueToEmergency()
  1055  }
  1056  
  1057  type ChainSyncMetrics interface {
  1058  	// record pruned blocks. requested and received times might be zero values
  1059  	PrunedBlockById(status *chainsync.Status)
  1060  
  1061  	PrunedBlockByHeight(status *chainsync.Status)
  1062  
  1063  	// totalByHeight and totalById are the number of blocks pruned for blocks requested by height and by id
  1064  	// storedByHeight and storedById are the number of blocks still stored by height and id
  1065  	PrunedBlocks(totalByHeight, totalById, storedByHeight, storedById int)
  1066  
  1067  	RangeRequested(ran chainsync.Range)
  1068  
  1069  	BatchRequested(batch chainsync.Batch)
  1070  }
  1071  
  1072  type DHTMetrics interface {
  1073  	RoutingTablePeerAdded()
  1074  	RoutingTablePeerRemoved()
  1075  }
  1076  
  1077  type CollectionExecutedMetric interface {
  1078  	CollectionFinalized(light flow.LightCollection)
  1079  	CollectionExecuted(light flow.LightCollection)
  1080  	BlockFinalized(block *flow.Block)
  1081  	ExecutionReceiptReceived(r *flow.ExecutionReceipt)
  1082  	UpdateLastFullBlockHeight(height uint64)
  1083  }