github.com/onflow/flow-go@v0.33.17/module/metrics.go (about) 1 package module 2 3 import ( 4 "context" 5 "time" 6 7 "github.com/libp2p/go-libp2p/core/peer" 8 rcmgr "github.com/libp2p/go-libp2p/p2p/host/resource-manager" 9 httpmetrics "github.com/slok/go-http-metrics/metrics" 10 11 "github.com/onflow/flow-go/model/chainsync" 12 "github.com/onflow/flow-go/model/cluster" 13 "github.com/onflow/flow-go/model/flow" 14 "github.com/onflow/flow-go/network/channels" 15 p2pmsg "github.com/onflow/flow-go/network/p2p/message" 16 ) 17 18 type EntriesFunc func() uint 19 20 // ResolverMetrics encapsulates the metrics collectors for dns resolver module of the networking layer. 21 type ResolverMetrics interface { 22 // DNSLookupDuration tracks the time spent to resolve a DNS address. 23 DNSLookupDuration(duration time.Duration) 24 25 // OnDNSCacheMiss tracks the total number of dns requests resolved through looking up the network. 26 OnDNSCacheMiss() 27 28 // OnDNSCacheHit tracks the total number of dns requests resolved through the cache without 29 // looking up the network. 30 OnDNSCacheHit() 31 32 // OnDNSCacheInvalidated is called whenever dns cache is invalidated for an entry 33 OnDNSCacheInvalidated() 34 35 // OnDNSLookupRequestDropped tracks the number of dns lookup requests that are dropped due to a full queue 36 OnDNSLookupRequestDropped() 37 } 38 39 // NetworkSecurityMetrics metrics related to network protection. 40 type NetworkSecurityMetrics interface { 41 // OnUnauthorizedMessage tracks the number of unauthorized messages seen on the network. 42 OnUnauthorizedMessage(role, msgType, topic, offense string) 43 44 // OnRateLimitedPeer tracks the number of rate limited unicast messages seen on the network. 45 OnRateLimitedPeer(pid peer.ID, role, msgType, topic, reason string) 46 47 // OnViolationReportSkipped tracks the number of slashing violations consumer violations that were not 48 // reported for misbehavior when the identity of the sender not known. 49 OnViolationReportSkipped() 50 } 51 52 // GossipSubRpcInspectorMetrics encapsulates the metrics collectors for GossipSub RPC Inspector module of the networking layer. 53 // The RPC inspector is the entry point of the GossipSub protocol. It inspects the incoming RPC messages and decides 54 // whether to accept, prune, or reject the RPC message. 55 // The GossipSubRpcInspectorMetrics tracks the number of RPC messages received by the local node from other nodes over 56 // the GossipSub protocol. It also tracks the number of control messages included in the RPC messages, i.e., IHAVE, IWANT, 57 // GRAFT, PRUNE. It also tracks the number of actual messages included in the RPC messages. 58 // The GossipSubRpcInspectorMetrics differs from LocalGossipSubRouterMetrics in that the former tracks the messages 59 // received by the local node from other nodes over the GossipSub protocol but may not all be accepted by the local node, 60 // e.g., due to RPC pruning or throttling; while the latter tracks the local node's view of the GossipSub protocol, i.e., entirely 61 // containing the messages that are accepted by the local node (either as whole RPC or only for the control messages). 62 // Having this distinction is useful for debugging and troubleshooting the GossipSub protocol, for example, the number of 63 // messages received by the local node from other nodes over the GossipSub protocol may be much higher than the number 64 // of messages accepted by the local node, which may indicate that the local node is throttling the incoming messages. 65 type GossipSubRpcInspectorMetrics interface { 66 // OnIWantMessageIDsReceived tracks the number of message ids received by the node from other nodes on an RPC. 67 // Note: this function is called on each IWANT message received by the node, not on each message id received. 68 OnIWantMessageIDsReceived(msgIdCount int) 69 70 // OnIHaveMessageIDsReceived tracks the number of message ids received by the node from other nodes on an iHave message. 71 // This function is called on each iHave message received by the node. 72 // Args: 73 // - channel: the channel on which the iHave message was received. 74 // - msgIdCount: the number of message ids received on the iHave message. 75 OnIHaveMessageIDsReceived(channel string, msgIdCount int) 76 77 // OnIncomingRpcReceived tracks the number of RPC messages received by the node. 78 // Args: 79 // iHaveCount: the number of iHAVE messages included in the RPC. 80 // iWantCount: the number of iWANT messages included in the RPC. 81 // graftCount: the number of GRAFT messages included in the RPC. 82 // pruneCount: the number of PRUNE messages included in the RPC. 83 // msgCount: the number of publish messages included in the RPC. 84 OnIncomingRpcReceived(iHaveCount, iWantCount, graftCount, pruneCount, msgCount int) 85 } 86 87 // LocalGossipSubRouterMetrics encapsulates the metrics collectors for GossipSub router of the local node. 88 // It gives a lens into the local GossipSub node's view of the GossipSub protocol. 89 // LocalGossipSubRouterMetrics differs from GossipSubRpcInspectorMetrics in that the former tracks the local node's view 90 // of the GossipSub protocol, while the latter tracks the messages received by the local node from other nodes over the 91 // GossipSub protocol but may not all be accepted by the local node, e.g., due to RPC pruning or throttling. 92 // Having this distinction is useful for debugging and troubleshooting the GossipSub protocol, for example, the number of 93 // messages received by the local node from other nodes over the GossipSub protocol may be much higher than the number 94 // of messages accepted by the local node, which may indicate that the local node is throttling the incoming messages. 95 type LocalGossipSubRouterMetrics interface { 96 // OnLocalMeshSizeUpdated tracks the size of the local mesh for a topic. 97 OnLocalMeshSizeUpdated(topic string, size int) 98 99 // OnPeerAddedToProtocol is called when the local node receives a stream from a peer on a gossipsub-related protocol. 100 // Args: 101 // protocol: the protocol name that the peer is connected to. 102 OnPeerAddedToProtocol(protocol string) 103 104 // OnPeerRemovedFromProtocol is called when the local considers a remote peer blacklisted or unavailable. 105 OnPeerRemovedFromProtocol() 106 107 // OnLocalPeerJoinedTopic is called when the local node subscribes to a gossipsub topic. 108 OnLocalPeerJoinedTopic() 109 110 // OnLocalPeerLeftTopic is called when the local node unsubscribes from a gossipsub topic. 111 OnLocalPeerLeftTopic() 112 113 // OnPeerGraftTopic is called when the local node receives a GRAFT message from a remote peer on a topic. 114 // Note: the received GRAFT at this point is considered passed the RPC inspection, and is accepted by the local node. 115 OnPeerGraftTopic(topic string) 116 117 // OnPeerPruneTopic is called when the local node receives a PRUNE message from a remote peer on a topic. 118 // Note: the received PRUNE at this point is considered passed the RPC inspection, and is accepted by the local node. 119 OnPeerPruneTopic(topic string) 120 121 // OnMessageEnteredValidation is called when a received pubsub message enters the validation pipeline. It is the 122 // internal validation pipeline of GossipSub protocol. The message may be rejected or accepted by the validation 123 // pipeline. 124 OnMessageEnteredValidation(size int) 125 126 // OnMessageRejected is called when a received pubsub message is rejected by the validation pipeline. 127 // Args: 128 // 129 // reason: the reason for rejection. 130 // size: the size of the message in bytes. 131 OnMessageRejected(size int, reason string) 132 133 // OnMessageDuplicate is called when a received pubsub message is a duplicate of a previously received message, and 134 // is dropped. 135 // Args: 136 // size: the size of the message in bytes. 137 OnMessageDuplicate(size int) 138 139 // OnPeerThrottled is called when a peer is throttled by the local node, i.e., the local node is not accepting any 140 // pubsub message from the peer but may still accept control messages. 141 OnPeerThrottled() 142 143 // OnRpcReceived is called when an RPC message is received by the local node. The received RPC is considered 144 // passed the RPC inspection, and is accepted by the local node. 145 // Args: 146 // msgCount: the number of messages included in the RPC. 147 // iHaveCount: the number of iHAVE messages included in the RPC. 148 // iWantCount: the number of iWANT messages included in the RPC. 149 // graftCount: the number of GRAFT messages included in the RPC. 150 // pruneCount: the number of PRUNE messages included in the RPC. 151 OnRpcReceived(msgCount int, iHaveCount int, iWantCount int, graftCount int, pruneCount int) 152 153 // OnRpcSent is called when an RPC message is sent by the local node. 154 // Note: the sent RPC is considered passed the RPC inspection, and is accepted by the local node. 155 // Args: 156 // msgCount: the number of messages included in the RPC. 157 // iHaveCount: the number of iHAVE messages included in the RPC. 158 // iWantCount: the number of iWANT messages included in the RPC. 159 // graftCount: the number of GRAFT messages included in the RPC. 160 // pruneCount: the number of PRUNE messages included in the RPC. 161 OnRpcSent(msgCount int, iHaveCount int, iWantCount int, graftCount int, pruneCount int) 162 163 // OnOutboundRpcDropped is called when an outbound RPC message is dropped by the local node, typically because the local node 164 // outbound message queue is full; or the RPC is big and the local node cannot fragment it. 165 OnOutboundRpcDropped() 166 167 // OnUndeliveredMessage is called when a message is not delivered at least one subscriber of the topic, for example when 168 // the subscriber is too slow to process the message. 169 OnUndeliveredMessage() 170 171 // OnMessageDeliveredToAllSubscribers is called when a message is delivered to all subscribers of the topic. 172 OnMessageDeliveredToAllSubscribers(size int) 173 } 174 175 // UnicastManagerMetrics unicast manager metrics. 176 type UnicastManagerMetrics interface { 177 // OnStreamCreated tracks the overall time it takes to create a stream successfully and the number of retry attempts. 178 OnStreamCreated(duration time.Duration, attempts int) 179 // OnStreamCreationFailure tracks the amount of time taken and number of retry attempts used when the unicast manager fails to create a stream. 180 OnStreamCreationFailure(duration time.Duration, attempts int) 181 // OnPeerDialed tracks the time it takes to dial a peer during stream creation and the number of retry attempts before a peer 182 // is dialed successfully. 183 OnPeerDialed(duration time.Duration, attempts int) 184 // OnPeerDialFailure tracks the amount of time taken and number of retry attempts used when the unicast manager cannot dial a peer 185 // to establish the initial connection between the two. 186 OnPeerDialFailure(duration time.Duration, attempts int) 187 // OnStreamEstablished tracks the time it takes to create a stream successfully on the available open connection during stream 188 // creation and the number of retry attempts. 189 OnStreamEstablished(duration time.Duration, attempts int) 190 // OnEstablishStreamFailure tracks the amount of time taken and number of retry attempts used when the unicast manager cannot establish 191 // a stream on the open connection between two peers. 192 OnEstablishStreamFailure(duration time.Duration, attempts int) 193 194 // OnDialRetryBudgetUpdated tracks the history of the dial retry budget updates. 195 OnDialRetryBudgetUpdated(budget uint64) 196 197 // OnStreamCreationRetryBudgetUpdated tracks the history of the stream creation retry budget updates. 198 OnStreamCreationRetryBudgetUpdated(budget uint64) 199 200 // OnDialRetryBudgetResetToDefault tracks the number of times the dial retry budget is reset to default. 201 OnDialRetryBudgetResetToDefault() 202 203 // OnStreamCreationRetryBudgetResetToDefault tracks the number of times the stream creation retry budget is reset to default. 204 OnStreamCreationRetryBudgetResetToDefault() 205 } 206 207 type GossipSubMetrics interface { 208 GossipSubScoringMetrics 209 GossipSubRpcInspectorMetrics 210 LocalGossipSubRouterMetrics 211 GossipSubRpcValidationInspectorMetrics 212 } 213 214 type LibP2PMetrics interface { 215 GossipSubMetrics 216 ResolverMetrics 217 DHTMetrics 218 rcmgr.MetricsReporter 219 LibP2PConnectionMetrics 220 UnicastManagerMetrics 221 } 222 223 // GossipSubScoringMetrics encapsulates the metrics collectors for the peer scoring module of GossipSub protocol. 224 // It tracks the scores of the peers in the local mesh and the different factors that contribute to the score of a peer. 225 // It also tracks the scores of the topics in the local mesh and the different factors that contribute to the score of a topic. 226 type GossipSubScoringMetrics interface { 227 // OnOverallPeerScoreUpdated tracks the overall score of peers in the local mesh. 228 OnOverallPeerScoreUpdated(float64) 229 // OnAppSpecificScoreUpdated tracks the application specific score of peers in the local mesh. 230 OnAppSpecificScoreUpdated(float64) 231 // OnIPColocationFactorUpdated tracks the IP colocation factor of peers in the local mesh. 232 OnIPColocationFactorUpdated(float64) 233 // OnBehaviourPenaltyUpdated tracks the behaviour penalty of peers in the local mesh. 234 OnBehaviourPenaltyUpdated(float64) 235 // OnTimeInMeshUpdated tracks the time in mesh factor of peers in the local mesh for a given topic. 236 OnTimeInMeshUpdated(channels.Topic, time.Duration) 237 // OnFirstMessageDeliveredUpdated tracks the first message delivered factor of peers in the local mesh for a given topic. 238 OnFirstMessageDeliveredUpdated(channels.Topic, float64) 239 // OnMeshMessageDeliveredUpdated tracks the mesh message delivered factor of peers in the local mesh for a given topic. 240 OnMeshMessageDeliveredUpdated(channels.Topic, float64) 241 // OnInvalidMessageDeliveredUpdated tracks the invalid message delivered factor of peers in the local mesh for a given topic. 242 OnInvalidMessageDeliveredUpdated(channels.Topic, float64) 243 // SetWarningStateCount tracks the warning score state of peers in the local mesh. It updates the total number of 244 // peers in the local mesh that are in the warning state based on their score. 245 SetWarningStateCount(uint) 246 } 247 248 // GossipSubRpcValidationInspectorMetrics encapsulates the metrics collectors for the gossipsub rpc validation control message inspectors. 249 type GossipSubRpcValidationInspectorMetrics interface { 250 GossipSubRpcInspectorMetrics 251 252 // AsyncProcessingStarted increments the metric tracking the number of inspect message request being processed by workers in the rpc validator worker pool. 253 AsyncProcessingStarted() 254 // AsyncProcessingFinished tracks the time spent by a rpc validation inspector worker to process an inspect message request asynchronously and decrements the metric tracking 255 // the number of inspect message requests being processed asynchronously by the rpc validation inspector workers. 256 AsyncProcessingFinished(duration time.Duration) 257 258 // OnIHaveControlMessageIdsTruncated tracks the number of times message ids on an iHave message were truncated. 259 // Note that this function is called only when the message ids are truncated from an iHave message, not when the iHave message itself is truncated. 260 // This is different from the OnControlMessagesTruncated function which is called when a slice of control messages truncated from an RPC with all their message ids. 261 // Args: 262 // 263 // diff: the number of actual messages truncated. 264 OnIHaveControlMessageIdsTruncated(diff int) 265 266 // OnIWantControlMessageIdsTruncated tracks the number of times message ids on an iWant message were truncated. 267 // Note that this function is called only when the message ids are truncated from an iWant message, not when the iWant message itself is truncated. 268 // This is different from the OnControlMessagesTruncated function which is called when a slice of control messages truncated from an RPC with all their message ids. 269 // Args: 270 // diff: the number of actual messages truncated. 271 OnIWantControlMessageIdsTruncated(diff int) 272 273 // OnControlMessagesTruncated tracks the number of times a slice of control messages is truncated from an RPC with all their included message ids. 274 // Args: 275 // 276 // messageType: the type of the control message that was truncated 277 // diff: the number of control messages truncated. 278 OnControlMessagesTruncated(messageType p2pmsg.ControlMessageType, diff int) 279 280 // OnIWantMessagesInspected tracks the number of duplicate and cache miss message ids received by the node on iWant messages at the end of the async inspection iWants 281 // across one RPC, regardless of the result of the inspection. 282 // 283 // duplicateCount: the total number of duplicate message ids received by the node on the iWant messages at the end of the async inspection of the RPC. 284 // cacheMissCount: the total number of cache miss message ids received by the node on the iWant message at the end of the async inspection of the RPC. 285 OnIWantMessagesInspected(duplicateCount int, cacheMissCount int) 286 287 // OnIWantDuplicateMessageIdsExceedThreshold tracks the number of times that async inspection of iWant messages failed due to the total number of duplicate message ids 288 // received by the node on the iWant messages of a single RPC exceeding the threshold, which results in a misbehaviour report. 289 OnIWantDuplicateMessageIdsExceedThreshold() 290 291 // OnIWantCacheMissMessageIdsExceedThreshold tracks the number of times that async inspection of iWant messages failed due to the total 292 // number of cache miss message ids received by the node on the iWant messages of a single RPC exceeding the threshold, which results in a misbehaviour report. 293 OnIWantCacheMissMessageIdsExceedThreshold() 294 295 // OnIHaveMessagesInspected is called at the end of the async inspection of iHave messages of a single RPC, regardless of the result of the inspection. 296 // It tracks the number of duplicate topic ids and duplicate message ids received by the node on the iHave messages of that single RPC at the end of the async inspection iHaves. 297 // Args: 298 // 299 // duplicateTopicIds: the total number of duplicate topic ids received by the node on the iHave messages at the end of the async inspection of the RPC. 300 // duplicateMessageIds: the number of duplicate message ids received by the node on the iHave messages at the end of the async inspection of the RPC. 301 OnIHaveMessagesInspected(duplicateTopicIds int, duplicateMessageIds int) 302 303 // OnIHaveDuplicateTopicIdsExceedThreshold tracks the number of times that the async inspection of iHave messages of a single RPC failed due to the total number of duplicate topic ids 304 // received by the node on the iHave messages of that RPC exceeding the threshold, which results in a misbehaviour report. 305 OnIHaveDuplicateTopicIdsExceedThreshold() 306 307 // OnIHaveDuplicateMessageIdsExceedThreshold tracks the number of times that the async inspection of iHave messages of a single RPC failed due to the total number of duplicate message ids 308 // received by the node on an iHave message exceeding the threshold, which results in a misbehaviour report. 309 OnIHaveDuplicateMessageIdsExceedThreshold() 310 311 // OnInvalidTopicIdDetectedForControlMessage tracks the number of times that the async inspection of a control message type on a single RPC failed due to an invalid topic id. 312 // Args: 313 // - messageType: the type of the control message that was truncated. 314 OnInvalidTopicIdDetectedForControlMessage(messageType p2pmsg.ControlMessageType) 315 316 // OnActiveClusterIDsNotSetErr tracks the number of times that the async inspection of a control message type on a single RPC failed due to active cluster ids not set inspection failure. 317 // This is not causing a misbehaviour report. 318 OnActiveClusterIDsNotSetErr() 319 320 // OnUnstakedPeerInspectionFailed tracks the number of times that the async inspection of a control message type on a single RPC failed due to unstaked peer inspection failure. 321 // This is not causing a misbehaviour report. 322 OnUnstakedPeerInspectionFailed() 323 324 // OnInvalidControlMessageNotificationSent tracks the number of times that the async inspection of a control message failed and resulted in dissemination of an invalid control message was sent. 325 OnInvalidControlMessageNotificationSent() 326 327 // OnPublishMessagesInspectionErrorExceedsThreshold tracks the number of times that async inspection of publish messages failed due to the number of errors. 328 OnPublishMessagesInspectionErrorExceedsThreshold() 329 330 // OnPruneDuplicateTopicIdsExceedThreshold tracks the number of times that the async inspection of prune messages for an RPC failed due to the number of duplicate topic ids 331 // received by the node on prune messages of the same RPC excesses threshold, which results in a misbehaviour report. 332 OnPruneDuplicateTopicIdsExceedThreshold() 333 334 // OnPruneMessageInspected is called at the end of the async inspection of prune messages of the RPC, regardless of the result of the inspection. 335 // Args: 336 // duplicateTopicIds: the number of duplicate topic ids received by the node on the prune messages of the RPC at the end of the async inspection prunes. 337 OnPruneMessageInspected(duplicateTopicIds int) 338 339 // OnGraftDuplicateTopicIdsExceedThreshold tracks the number of times that the async inspection of the graft messages of a single RPC failed due to the number of duplicate topic ids 340 // received by the node on graft messages of the same RPC excesses threshold, which results in a misbehaviour report. 341 OnGraftDuplicateTopicIdsExceedThreshold() 342 343 // OnGraftMessageInspected is called at the end of the async inspection of graft messages of a single RPC, regardless of the result of the inspection. 344 // Args: 345 // duplicateTopicIds: the number of duplicate topic ids received by the node on the graft messages at the end of the async inspection of a single RPC. 346 OnGraftMessageInspected(duplicateTopicIds int) 347 348 // OnPublishMessageInspected is called at the end of the async inspection of publish messages of a single RPC, regardless of the result of the inspection. 349 // It tracks the total number of errors detected during the async inspection of the rpc together with their individual breakdown. 350 // Args: 351 // - errCount: the number of errors that occurred during the async inspection of publish messages. 352 // - invalidTopicIdsCount: the number of times that an invalid topic id was detected during the async inspection of publish messages. 353 // - invalidSubscriptionsCount: the number of times that an invalid subscription was detected during the async inspection of publish messages. 354 // - invalidSendersCount: the number of times that an invalid sender was detected during the async inspection of publish messages. 355 OnPublishMessageInspected(totalErrCount int, invalidTopicIdsCount int, invalidSubscriptionsCount int, invalidSendersCount int) 356 } 357 358 // NetworkInboundQueueMetrics encapsulates the metrics collectors for the inbound queue of the networking layer. 359 type NetworkInboundQueueMetrics interface { 360 361 // MessageAdded increments the metric tracking the number of messages in the queue with the given priority 362 MessageAdded(priority int) 363 364 // MessageRemoved decrements the metric tracking the number of messages in the queue with the given priority 365 MessageRemoved(priority int) 366 367 // QueueDuration tracks the time spent by a message with the given priority in the queue 368 QueueDuration(duration time.Duration, priority int) 369 } 370 371 // NetworkCoreMetrics encapsulates the metrics collectors for the core networking layer functionality. 372 type NetworkCoreMetrics interface { 373 NetworkInboundQueueMetrics 374 AlspMetrics 375 NetworkSecurityMetrics 376 377 // OutboundMessageSent collects metrics related to a message sent by the node. 378 OutboundMessageSent(sizeBytes int, topic string, protocol string, messageType string) 379 // InboundMessageReceived collects metrics related to a message received by the node. 380 InboundMessageReceived(sizeBytes int, topic string, protocol string, messageType string) 381 // DuplicateInboundMessagesDropped increments the metric tracking the number of duplicate messages dropped by the node. 382 DuplicateInboundMessagesDropped(topic string, protocol string, messageType string) 383 // UnicastMessageSendingStarted increments the metric tracking the number of unicast messages sent by the node. 384 UnicastMessageSendingStarted(topic string) 385 // UnicastMessageSendingCompleted decrements the metric tracking the number of unicast messages sent by the node. 386 UnicastMessageSendingCompleted(topic string) 387 // MessageProcessingStarted increments the metric tracking the number of messages being processed by the node. 388 MessageProcessingStarted(topic string) 389 // MessageProcessingFinished tracks the time spent by the node to process a message and decrements the metric tracking 390 // the number of messages being processed by the node. 391 MessageProcessingFinished(topic string, duration time.Duration) 392 } 393 394 // LibP2PConnectionMetrics encapsulates the metrics collectors for the connection manager of the libp2p node. 395 type LibP2PConnectionMetrics interface { 396 // OutboundConnections updates the metric tracking the number of outbound connections of this node 397 OutboundConnections(connectionCount uint) 398 399 // InboundConnections updates the metric tracking the number of inbound connections of this node 400 InboundConnections(connectionCount uint) 401 } 402 403 // AlspMetrics encapsulates the metrics collectors for the Application Layer Spam Prevention (ALSP) module, which 404 // is part of the networking layer. ALSP is responsible to prevent spam attacks on the application layer messages that 405 // appear to be valid for the networking layer but carry on a malicious intent on the application layer (i.e., Flow protocols). 406 type AlspMetrics interface { 407 // OnMisbehaviorReported is called when a misbehavior is reported by the application layer to ALSP. 408 // An engine detecting a spamming-related misbehavior reports it to the ALSP module. 409 // Args: 410 // - channel: the channel on which the misbehavior was reported 411 // - misbehaviorType: the type of misbehavior reported 412 OnMisbehaviorReported(channel string, misbehaviorType string) 413 } 414 415 // NetworkMetrics is the blanket abstraction that encapsulates the metrics collectors for the networking layer. 416 type NetworkMetrics interface { 417 LibP2PMetrics 418 NetworkCoreMetrics 419 } 420 421 // EngineMetrics is a generic metrics consumer for node-internal data processing 422 // components (aka engines). Implementations must be non-blocking and concurrency safe. 423 type EngineMetrics interface { 424 // MessageSent reports that the engine transmitted the message over the network. 425 // Unicasts, broadcasts, and multicasts are all reported once. 426 MessageSent(engine string, message string) 427 // MessageReceived reports that the engine received the message over the network. 428 MessageReceived(engine string, message string) 429 // MessageHandled reports that the engine has finished processing the message. 430 // Both invalid and valid messages should be reported. 431 // A message must be reported as either handled or dropped, not both. 432 MessageHandled(engine string, messages string) 433 // InboundMessageDropped reports that the engine has dropped inbound message without processing it. 434 // Inbound messages must be reported as either handled or dropped, not both. 435 InboundMessageDropped(engine string, messages string) 436 // OutboundMessageDropped reports that the engine has dropped outbound message without processing it. 437 // Outbound messages must be reported as either sent or dropped, not both. 438 OutboundMessageDropped(engine string, messages string) 439 } 440 441 type ComplianceMetrics interface { 442 FinalizedHeight(height uint64) 443 CommittedEpochFinalView(view uint64) 444 EpochTransitionHeight(height uint64) 445 SealedHeight(height uint64) 446 BlockFinalized(*flow.Block) 447 BlockSealed(*flow.Block) 448 CurrentEpochCounter(counter uint64) 449 CurrentEpochPhase(phase flow.EpochPhase) 450 CurrentEpochFinalView(view uint64) 451 CurrentDKGPhase1FinalView(view uint64) 452 CurrentDKGPhase2FinalView(view uint64) 453 CurrentDKGPhase3FinalView(view uint64) 454 EpochEmergencyFallbackTriggered() 455 } 456 457 type CleanerMetrics interface { 458 RanGC(took time.Duration) 459 } 460 461 type CacheMetrics interface { 462 // CacheEntries report the total number of cached items 463 CacheEntries(resource string, entries uint) 464 // CacheHit report the number of times the queried item is found in the cache 465 CacheHit(resource string) 466 // CacheNotFound records the number of times the queried item was not found in either cache or database. 467 CacheNotFound(resource string) 468 // CacheMiss report the number of times the queried item is not found in the cache, but found in the database. 469 CacheMiss(resource string) 470 } 471 472 type MempoolMetrics interface { 473 MempoolEntries(resource string, entries uint) 474 Register(resource string, entriesFunc EntriesFunc) error 475 } 476 477 type HotstuffMetrics interface { 478 // HotStuffBusyDuration reports Metrics C6 HotStuff Busy Duration 479 HotStuffBusyDuration(duration time.Duration, event string) 480 481 // HotStuffIdleDuration reports Metrics C6 HotStuff Idle Duration 482 HotStuffIdleDuration(duration time.Duration) 483 484 // HotStuffWaitDuration reports Metrics C6 HotStuff Idle Duration - the time between receiving and 485 // enqueueing a message to beginning to process that message. 486 HotStuffWaitDuration(duration time.Duration, event string) 487 488 // SetCurView reports Metrics C8: Current View maintained by Pacemaker. 489 SetCurView(view uint64) 490 491 // SetQCView reports Metrics C9: View of the newest QC known to Pacemaker. 492 SetQCView(view uint64) 493 494 // SetTCView reports last TC known to Pacemaker. 495 SetTCView(view uint64) 496 497 // CountSkipped counts the number of skips we did. 498 CountSkipped() 499 500 // CountTimeout tracks the number of views that this replica left due to observing a TC. 501 CountTimeout() 502 503 // SetTimeout sets the current timeout duration 504 SetTimeout(duration time.Duration) 505 506 // BlockProcessingDuration measures the time which the compliance engine 507 // spends to process one block proposal. 508 BlockProcessingDuration(duration time.Duration) 509 510 // VoteProcessingDuration measures the time which the hotstuff.VoteAggregator 511 // spends to process one vote. 512 VoteProcessingDuration(duration time.Duration) 513 514 // TimeoutObjectProcessingDuration measures the time which the hotstuff.TimeoutAggregator 515 // spends to process one timeout object. 516 TimeoutObjectProcessingDuration(duration time.Duration) 517 518 // CommitteeProcessingDuration measures the time which the HotStuff's core logic 519 // spends in the hotstuff.Replicas component, i.e. the time determining consensus 520 // committee relations. 521 CommitteeProcessingDuration(duration time.Duration) 522 523 // SignerProcessingDuration measures the time which the HotStuff's core logic 524 // spends in the hotstuff.Signer component, i.e. the with crypto-related operations. 525 SignerProcessingDuration(duration time.Duration) 526 527 // ValidatorProcessingDuration measures the time which the HotStuff's core logic 528 // spends in the hotstuff.Validator component, i.e. the with verifying 529 // consensus messages. 530 ValidatorProcessingDuration(duration time.Duration) 531 532 // PayloadProductionDuration measures the time which the HotStuff's core logic 533 // spends in the module.Builder component, i.e. the with generating block payloads. 534 PayloadProductionDuration(duration time.Duration) 535 536 // TimeoutCollectorsRange collects information from the node's `TimeoutAggregator` component. 537 // Specifically, it measurers the number of views for which we are currently collecting timeouts 538 // (i.e. the number of `TimeoutCollector` instances we are maintaining) and their lowest/highest view. 539 TimeoutCollectorsRange(lowestRetainedView uint64, newestViewCreatedCollector uint64, activeCollectors int) 540 } 541 542 type CruiseCtlMetrics interface { 543 544 // PIDError measures the current error values for the proportional, integration, 545 // and derivative terms of the PID controller. 546 PIDError(p, i, d float64) 547 548 // TargetProposalDuration measures the current value of the Block Time Controller output: 549 // the target duration from parent to child proposal. 550 TargetProposalDuration(duration time.Duration) 551 552 // ControllerOutput measures the output of the cruise control PID controller. 553 // Concretely, this is the quantity to subtract from the baseline view duration. 554 ControllerOutput(duration time.Duration) 555 } 556 557 type CollectionMetrics interface { 558 // TransactionIngested is called when a new transaction is ingested by the 559 // node. It increments the total count of ingested transactions and starts 560 // a tx->col span for the transaction. 561 TransactionIngested(txID flow.Identifier) 562 563 // ClusterBlockProposed is called when a new collection is proposed by us or 564 // any other node in the cluster. 565 ClusterBlockProposed(block *cluster.Block) 566 567 // ClusterBlockFinalized is called when a collection is finalized. 568 ClusterBlockFinalized(block *cluster.Block) 569 } 570 571 type ConsensusMetrics interface { 572 // StartCollectionToFinalized reports Metrics C1: Collection Received by CCL→ Collection Included in Finalized Block 573 StartCollectionToFinalized(collectionID flow.Identifier) 574 575 // FinishCollectionToFinalized reports Metrics C1: Collection Received by CCL→ Collection Included in Finalized Block 576 FinishCollectionToFinalized(collectionID flow.Identifier) 577 578 // StartBlockToSeal reports Metrics C4: Block Received by CCL → Block Seal in finalized block 579 StartBlockToSeal(blockID flow.Identifier) 580 581 // FinishBlockToSeal reports Metrics C4: Block Received by CCL → Block Seal in finalized block 582 FinishBlockToSeal(blockID flow.Identifier) 583 584 // EmergencySeal increments the number of seals that were created in emergency mode 585 EmergencySeal() 586 587 // OnReceiptProcessingDuration records the number of seconds spent processing a receipt 588 OnReceiptProcessingDuration(duration time.Duration) 589 590 // OnApprovalProcessingDuration records the number of seconds spent processing an approval 591 OnApprovalProcessingDuration(duration time.Duration) 592 593 // CheckSealingDuration records absolute time for the full sealing check by the consensus match engine 594 CheckSealingDuration(duration time.Duration) 595 } 596 597 type VerificationMetrics interface { 598 // OnBlockConsumerJobDone is invoked by block consumer whenever it is notified a job is done by a worker. It 599 // sets the last processed block job index. 600 OnBlockConsumerJobDone(uint64) 601 // OnChunkConsumerJobDone is invoked by chunk consumer whenever it is notified a job is done by a worker. It 602 // sets the last processed chunk job index. 603 OnChunkConsumerJobDone(uint64) 604 // OnExecutionResultReceivedAtAssignerEngine is called whenever a new execution result arrives 605 // at Assigner engine. It increments total number of received execution results. 606 OnExecutionResultReceivedAtAssignerEngine() 607 608 // OnVerifiableChunkReceivedAtVerifierEngine increments a counter that keeps track of number of verifiable chunks received at 609 // verifier engine from fetcher engine. 610 OnVerifiableChunkReceivedAtVerifierEngine() 611 612 // OnFinalizedBlockArrivedAtAssigner sets a gauge that keeps track of number of the latest block height arrives 613 // at assigner engine. Note that it assumes blocks are coming to assigner engine in strictly increasing order of their height. 614 OnFinalizedBlockArrivedAtAssigner(height uint64) 615 616 // OnChunksAssignmentDoneAtAssigner increments a counter that keeps track of the total number of assigned chunks to 617 // the verification node. 618 OnChunksAssignmentDoneAtAssigner(chunks int) 619 620 // OnAssignedChunkProcessedAtAssigner increments a counter that keeps track of the total number of assigned chunks pushed by 621 // assigner engine to the fetcher engine. 622 OnAssignedChunkProcessedAtAssigner() 623 624 // OnAssignedChunkReceivedAtFetcher increments a counter that keeps track of number of assigned chunks arrive at fetcher engine. 625 OnAssignedChunkReceivedAtFetcher() 626 627 // OnChunkDataPackRequestSentByFetcher increments a counter that keeps track of number of chunk data pack requests that fetcher engine 628 // sends to requester engine. 629 OnChunkDataPackRequestSentByFetcher() 630 631 // OnChunkDataPackRequestReceivedByRequester increments a counter that keeps track of number of chunk data pack requests 632 // arrive at the requester engine from the fetcher engine. 633 OnChunkDataPackRequestReceivedByRequester() 634 635 // OnChunkDataPackRequestDispatchedInNetwork increments a counter that keeps track of number of chunk data pack requests that the 636 // requester engine dispatches in the network (to the execution nodes). 637 OnChunkDataPackRequestDispatchedInNetworkByRequester() 638 639 // OnChunkDataPackResponseReceivedFromNetwork increments a counter that keeps track of number of chunk data pack responses that the 640 // requester engine receives from execution nodes (through network). 641 OnChunkDataPackResponseReceivedFromNetworkByRequester() 642 643 // SetMaxChunkDataPackAttemptsForNextUnsealedHeightAtRequester is invoked when a cycle of requesting chunk data packs is done by requester engine. 644 // It updates the maximum number of attempts made by requester engine for requesting the chunk data packs of the next unsealed height. 645 // The maximum is taken over the history of all chunk data packs requested during that cycle that belong to the next unsealed height. 646 SetMaxChunkDataPackAttemptsForNextUnsealedHeightAtRequester(attempts uint64) 647 648 // OnChunkDataPackSentToFetcher increments a counter that keeps track of number of chunk data packs sent to the fetcher engine from 649 // requester engine. 650 OnChunkDataPackSentToFetcher() 651 652 // OnChunkDataPackArrivedAtFetcher increments a counter that keeps track of number of chunk data packs arrived at fetcher engine from 653 // requester engine. 654 OnChunkDataPackArrivedAtFetcher() 655 656 // OnVerifiableChunkSentToVerifier increments a counter that keeps track of number of verifiable chunks fetcher engine sent to verifier engine. 657 OnVerifiableChunkSentToVerifier() 658 659 // OnResultApprovalDispatchedInNetwork increments a counter that keeps track of number of result approvals dispatched in the network 660 // by verifier engine. 661 OnResultApprovalDispatchedInNetworkByVerifier() 662 } 663 664 // LedgerMetrics provides an interface to record Ledger Storage metrics. 665 // Ledger storage is non-linear (fork-aware) so certain metrics are averaged 666 // and computed before emitting for better visibility 667 type LedgerMetrics interface { 668 // ForestApproxMemorySize records approximate memory usage of forest (all in-memory trees) 669 ForestApproxMemorySize(bytes uint64) 670 671 // ForestNumberOfTrees current number of trees in a forest (in memory) 672 ForestNumberOfTrees(number uint64) 673 674 // LatestTrieRegCount records the number of unique register allocated (the latest created trie) 675 LatestTrieRegCount(number uint64) 676 677 // LatestTrieRegCountDiff records the difference between the number of unique register allocated of the latest created trie and parent trie 678 LatestTrieRegCountDiff(number int64) 679 680 // LatestTrieRegSize records the size of unique register allocated (the latest created trie) 681 LatestTrieRegSize(size uint64) 682 683 // LatestTrieRegSizeDiff records the difference between the size of unique register allocated of the latest created trie and parent trie 684 LatestTrieRegSizeDiff(size int64) 685 686 // LatestTrieMaxDepthTouched records the maximum depth touched of the lastest created trie 687 LatestTrieMaxDepthTouched(maxDepth uint16) 688 689 // UpdateCount increase a counter of performed updates 690 UpdateCount() 691 692 // ProofSize records a proof size 693 ProofSize(bytes uint32) 694 695 // UpdateValuesNumber accumulates number of updated values 696 UpdateValuesNumber(number uint64) 697 698 // UpdateValuesSize total size (in bytes) of updates values 699 UpdateValuesSize(byte uint64) 700 701 // UpdateDuration records absolute time for the update of a trie 702 UpdateDuration(duration time.Duration) 703 704 // UpdateDurationPerItem records update time for single value (total duration / number of updated values) 705 UpdateDurationPerItem(duration time.Duration) 706 707 // ReadValuesNumber accumulates number of read values 708 ReadValuesNumber(number uint64) 709 710 // ReadValuesSize total size (in bytes) of read values 711 ReadValuesSize(byte uint64) 712 713 // ReadDuration records absolute time for the read from a trie 714 ReadDuration(duration time.Duration) 715 716 // ReadDurationPerItem records read time for single value (total duration / number of read values) 717 ReadDurationPerItem(duration time.Duration) 718 } 719 720 type WALMetrics interface { 721 // ExecutionCheckpointSize reports the size of a checkpoint in bytes 722 ExecutionCheckpointSize(bytes uint64) 723 } 724 725 type RateLimitedBlockstoreMetrics interface { 726 BytesRead(int) 727 } 728 729 type BitswapMetrics interface { 730 Peers(prefix string, n int) 731 Wantlist(prefix string, n int) 732 BlobsReceived(prefix string, n uint64) 733 DataReceived(prefix string, n uint64) 734 BlobsSent(prefix string, n uint64) 735 DataSent(prefix string, n uint64) 736 DupBlobsReceived(prefix string, n uint64) 737 DupDataReceived(prefix string, n uint64) 738 MessagesReceived(prefix string, n uint64) 739 } 740 741 type ExecutionDataRequesterMetrics interface { 742 // ExecutionDataFetchStarted records an in-progress download 743 ExecutionDataFetchStarted() 744 745 // ExecutionDataFetchFinished records a completed download 746 ExecutionDataFetchFinished(duration time.Duration, success bool, height uint64) 747 748 // NotificationSent reports that ExecutionData received notifications were sent for a block height 749 NotificationSent(height uint64) 750 751 // FetchRetried reports that a download retry was processed 752 FetchRetried() 753 } 754 755 type ExecutionStateIndexerMetrics interface { 756 // BlockIndexed records metrics from indexing execution data from a single block. 757 BlockIndexed(height uint64, duration time.Duration, events, registers, transactionResults int) 758 759 // BlockReindexed records that a previously indexed block was indexed again. 760 BlockReindexed() 761 762 // InitializeLatestHeight records the latest height that has been indexed. 763 // This should only be used during startup. After startup, use BlockIndexed to record newly 764 // indexed heights. 765 InitializeLatestHeight(height uint64) 766 } 767 768 type RuntimeMetrics interface { 769 // RuntimeTransactionParsed reports the time spent parsing a single transaction 770 RuntimeTransactionParsed(dur time.Duration) 771 772 // RuntimeTransactionChecked reports the time spent checking a single transaction 773 RuntimeTransactionChecked(dur time.Duration) 774 775 // RuntimeTransactionInterpreted reports the time spent interpreting a single transaction 776 RuntimeTransactionInterpreted(dur time.Duration) 777 778 // RuntimeSetNumberOfAccounts Sets the total number of accounts on the network 779 RuntimeSetNumberOfAccounts(count uint64) 780 781 // RuntimeTransactionProgramsCacheMiss reports a programs cache miss 782 // during transaction execution 783 RuntimeTransactionProgramsCacheMiss() 784 785 // RuntimeTransactionProgramsCacheHit reports a programs cache hit 786 // during transaction execution 787 RuntimeTransactionProgramsCacheHit() 788 } 789 790 type ProviderMetrics interface { 791 // ChunkDataPackRequestProcessed is executed every time a chunk data pack request is picked up for processing at execution node. 792 // It increases the request processed counter by one. 793 ChunkDataPackRequestProcessed() 794 } 795 796 type ExecutionDataProviderMetrics interface { 797 RootIDComputed(duration time.Duration, numberOfChunks int) 798 AddBlobsSucceeded(duration time.Duration, totalSize uint64) 799 AddBlobsFailed() 800 } 801 802 type ExecutionDataRequesterV2Metrics interface { 803 FulfilledHeight(blockHeight uint64) 804 ReceiptSkipped() 805 RequestSucceeded(blockHeight uint64, duration time.Duration, totalSize uint64, numberOfAttempts int) 806 RequestFailed(duration time.Duration, retryable bool) 807 RequestCanceled() 808 ResponseDropped() 809 } 810 811 type ExecutionDataPrunerMetrics interface { 812 Pruned(height uint64, duration time.Duration) 813 } 814 815 type RestMetrics interface { 816 // Example recorder taken from: 817 // https://github.com/slok/go-http-metrics/blob/master/metrics/prometheus/prometheus.go 818 httpmetrics.Recorder 819 AddTotalRequests(ctx context.Context, method string, routeName string) 820 } 821 822 type GRPCConnectionPoolMetrics interface { 823 // TotalConnectionsInPool updates the number connections to collection/execution nodes stored in the pool, and the size of the pool 824 TotalConnectionsInPool(connectionCount uint, connectionPoolSize uint) 825 826 // ConnectionFromPoolReused tracks the number of times a connection to a collection/execution node is reused from the connection pool 827 ConnectionFromPoolReused() 828 829 // ConnectionAddedToPool tracks the number of times a collection/execution node is added to the connection pool 830 ConnectionAddedToPool() 831 832 // NewConnectionEstablished tracks the number of times a new grpc connection is established 833 NewConnectionEstablished() 834 835 // ConnectionFromPoolInvalidated tracks the number of times a cached grpc connection is invalidated and closed 836 ConnectionFromPoolInvalidated() 837 838 // ConnectionFromPoolUpdated tracks the number of times a cached connection is updated 839 ConnectionFromPoolUpdated() 840 841 // ConnectionFromPoolEvicted tracks the number of times a cached connection is evicted from the cache 842 ConnectionFromPoolEvicted() 843 } 844 845 type AccessMetrics interface { 846 RestMetrics 847 GRPCConnectionPoolMetrics 848 TransactionMetrics 849 BackendScriptsMetrics 850 851 // UpdateExecutionReceiptMaxHeight is called whenever we store an execution receipt from a block from a newer height 852 UpdateExecutionReceiptMaxHeight(height uint64) 853 854 // UpdateLastFullBlockHeight tracks the height of the last block for which all collections were received 855 UpdateLastFullBlockHeight(height uint64) 856 } 857 858 type ExecutionResultStats struct { 859 ComputationUsed uint64 860 MemoryUsed uint64 861 EventCounts int 862 EventSize int 863 NumberOfRegistersTouched int 864 NumberOfBytesWrittenToRegisters int 865 NumberOfCollections int 866 NumberOfTransactions int 867 } 868 869 func (stats *ExecutionResultStats) Merge(other ExecutionResultStats) { 870 stats.ComputationUsed += other.ComputationUsed 871 stats.MemoryUsed += other.MemoryUsed 872 stats.EventCounts += other.EventCounts 873 stats.EventSize += other.EventSize 874 stats.NumberOfRegistersTouched += other.NumberOfRegistersTouched 875 stats.NumberOfBytesWrittenToRegisters += other.NumberOfBytesWrittenToRegisters 876 stats.NumberOfCollections += other.NumberOfCollections 877 stats.NumberOfTransactions += other.NumberOfTransactions 878 } 879 880 type ExecutionMetrics interface { 881 LedgerMetrics 882 RuntimeMetrics 883 ProviderMetrics 884 WALMetrics 885 886 // StartBlockReceivedToExecuted starts a span to trace the duration of a block 887 // from being received for execution to execution being finished 888 StartBlockReceivedToExecuted(blockID flow.Identifier) 889 890 // FinishBlockReceivedToExecuted finishes a span to trace the duration of a block 891 // from being received for execution to execution being finished 892 FinishBlockReceivedToExecuted(blockID flow.Identifier) 893 894 // ExecutionStorageStateCommitment reports the storage size of a state commitment in bytes 895 ExecutionStorageStateCommitment(bytes int64) 896 897 // ExecutionLastExecutedBlockHeight reports last executed block height 898 ExecutionLastExecutedBlockHeight(height uint64) 899 900 // ExecutionLastFinalizedExecutedBlockHeight reports last finalized and executed block height 901 ExecutionLastFinalizedExecutedBlockHeight(height uint64) 902 903 // ExecutionBlockExecuted reports the total time and computation spent on executing a block 904 ExecutionBlockExecuted(dur time.Duration, stats ExecutionResultStats) 905 906 // ExecutionBlockExecutionEffortVectorComponent reports the unweighted effort of given ComputationKind at block level 907 ExecutionBlockExecutionEffortVectorComponent(string, uint) 908 909 // ExecutionBlockCachedPrograms reports the number of cached programs at the end of a block 910 ExecutionBlockCachedPrograms(programs int) 911 912 // ExecutionCollectionExecuted reports the total time and computation spent on executing a collection 913 ExecutionCollectionExecuted(dur time.Duration, stats ExecutionResultStats) 914 915 // ExecutionTransactionExecuted reports stats on executing a single transaction 916 ExecutionTransactionExecuted( 917 dur time.Duration, 918 numTxnConflictRetries int, 919 compUsed uint64, 920 memoryUsed uint64, 921 eventCounts int, 922 eventSize int, 923 failed bool) 924 925 // ExecutionChunkDataPackGenerated reports stats on chunk data pack generation 926 ExecutionChunkDataPackGenerated(proofSize, numberOfTransactions int) 927 928 // ExecutionScriptExecuted reports the time and memory spent on executing an script 929 ExecutionScriptExecuted(dur time.Duration, compUsed, memoryUsed, memoryEstimate uint64) 930 931 // ExecutionCollectionRequestSent reports when a request for a collection is sent to a collection node 932 ExecutionCollectionRequestSent() 933 934 // Unused 935 ExecutionCollectionRequestRetried() 936 937 // ExecutionSync reports when the state syncing is triggered or stopped. 938 ExecutionSync(syncing bool) 939 940 // Upload metrics 941 ExecutionBlockDataUploadStarted() 942 ExecutionBlockDataUploadFinished(dur time.Duration) 943 ExecutionComputationResultUploaded() 944 ExecutionComputationResultUploadRetried() 945 946 UpdateCollectionMaxHeight(height uint64) 947 } 948 949 type BackendScriptsMetrics interface { 950 // ScriptExecuted records the round trip time while executing a script 951 ScriptExecuted(dur time.Duration, size int) 952 953 // ScriptExecutionErrorLocal records script execution failures from local execution 954 ScriptExecutionErrorLocal() 955 956 // ScriptExecutionErrorOnExecutionNode records script execution failures on Execution Nodes 957 ScriptExecutionErrorOnExecutionNode() 958 959 // ScriptExecutionResultMismatch records script execution result mismatches between local and 960 // execution nodes 961 ScriptExecutionResultMismatch() 962 963 // ScriptExecutionResultMatch records script execution result matches between local and 964 // execution nodes 965 ScriptExecutionResultMatch() 966 967 // ScriptExecutionErrorMismatch records script execution error mismatches between local and 968 // execution nodes 969 ScriptExecutionErrorMismatch() 970 971 // ScriptExecutionErrorMatch records script execution error matches between local and 972 // execution nodes 973 ScriptExecutionErrorMatch() 974 975 // ScriptExecutionNotIndexed records script execution matches where data for the block is not 976 // indexed locally yet 977 ScriptExecutionNotIndexed() 978 } 979 980 type TransactionMetrics interface { 981 // Record the round trip time while getting a transaction result 982 TransactionResultFetched(dur time.Duration, size int) 983 984 // TransactionReceived starts tracking of transaction execution/finalization/sealing 985 TransactionReceived(txID flow.Identifier, when time.Time) 986 987 // TransactionFinalized reports the time spent between the transaction being received and finalized. Reporting only 988 // works if the transaction was earlier added as received. 989 TransactionFinalized(txID flow.Identifier, when time.Time) 990 991 // TransactionExecuted reports the time spent between the transaction being received and executed. Reporting only 992 // works if the transaction was earlier added as received. 993 TransactionExecuted(txID flow.Identifier, when time.Time) 994 995 // TransactionExpired tracks number of expired transactions 996 TransactionExpired(txID flow.Identifier) 997 998 // TransactionSubmissionFailed should be called whenever we try to submit a transaction and it fails 999 TransactionSubmissionFailed() 1000 } 1001 1002 type PingMetrics interface { 1003 // NodeReachable tracks the round trip time in milliseconds taken to ping a node 1004 // The nodeInfo provides additional information about the node such as the name of the node operator 1005 NodeReachable(node *flow.Identity, nodeInfo string, rtt time.Duration) 1006 1007 // NodeInfo tracks the software version, sealed height and hotstuff view of a node 1008 NodeInfo(node *flow.Identity, nodeInfo string, version string, sealedHeight uint64, hotstuffCurView uint64) 1009 } 1010 1011 type HeroCacheMetrics interface { 1012 // BucketAvailableSlots keeps track of number of available slots in buckets of cache. 1013 BucketAvailableSlots(uint64, uint64) 1014 1015 // OnKeyPutAttempt is called whenever a new (key, value) pair is attempted to be put in cache. 1016 // It does not reflect whether the put was successful or not. 1017 // A (key, value) pair put attempt may fail if the cache is full, or the key already exists. 1018 OnKeyPutAttempt(size uint32) 1019 1020 // OnKeyPutSuccess is called whenever a new (key, entity) pair is successfully added to the cache. 1021 OnKeyPutSuccess(size uint32) 1022 1023 // OnKeyPutDrop is called whenever a new (key, entity) pair is dropped from the cache due to full cache. 1024 OnKeyPutDrop() 1025 1026 // OnKeyPutDeduplicated is tracking the total number of unsuccessful writes caused by adding a duplicate key to the cache. 1027 // A duplicate key is dropped by the cache when it is written to the cache. 1028 // Note: in context of HeroCache, the key corresponds to the identifier of its entity. Hence, a duplicate key corresponds to 1029 // a duplicate entity. 1030 OnKeyPutDeduplicated() 1031 1032 // OnKeyRemoved is called whenever a (key, entity) pair is removed from the cache. 1033 OnKeyRemoved(size uint32) 1034 1035 // OnKeyGetSuccess tracks total number of successful read queries. 1036 // A read query is successful if the entity corresponding to its key is available in the cache. 1037 // Note: in context of HeroCache, the key corresponds to the identifier of its entity. 1038 OnKeyGetSuccess() 1039 1040 // OnKeyGetFailure tracks total number of unsuccessful read queries. 1041 // A read query is unsuccessful if the entity corresponding to its key is not available in the cache. 1042 // Note: in context of HeroCache, the key corresponds to the identifier of its entity. 1043 OnKeyGetFailure() 1044 1045 // OnEntityEjectionDueToFullCapacity is called whenever adding a new (key, entity) to the cache results in ejection of another (key', entity') pair. 1046 // This normally happens -- and is expected -- when the cache is full. 1047 // Note: in context of HeroCache, the key corresponds to the identifier of its entity. 1048 OnEntityEjectionDueToFullCapacity() 1049 1050 // OnEntityEjectionDueToEmergency is called whenever a bucket is found full and all of its keys are valid, i.e., 1051 // each key belongs to an existing (key, entity) pair. 1052 // Hence, adding a new key to that bucket will replace the oldest valid key inside that bucket. 1053 // Note: in context of HeroCache, the key corresponds to the identifier of its entity. 1054 OnEntityEjectionDueToEmergency() 1055 } 1056 1057 type ChainSyncMetrics interface { 1058 // record pruned blocks. requested and received times might be zero values 1059 PrunedBlockById(status *chainsync.Status) 1060 1061 PrunedBlockByHeight(status *chainsync.Status) 1062 1063 // totalByHeight and totalById are the number of blocks pruned for blocks requested by height and by id 1064 // storedByHeight and storedById are the number of blocks still stored by height and id 1065 PrunedBlocks(totalByHeight, totalById, storedByHeight, storedById int) 1066 1067 RangeRequested(ran chainsync.Range) 1068 1069 BatchRequested(batch chainsync.Batch) 1070 } 1071 1072 type DHTMetrics interface { 1073 RoutingTablePeerAdded() 1074 RoutingTablePeerRemoved() 1075 } 1076 1077 type CollectionExecutedMetric interface { 1078 CollectionFinalized(light flow.LightCollection) 1079 CollectionExecuted(light flow.LightCollection) 1080 BlockFinalized(block *flow.Block) 1081 ExecutionReceiptReceived(r *flow.ExecutionReceipt) 1082 UpdateLastFullBlockHeight(height uint64) 1083 }