github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/module/metrics/gossipsub.go (about) 1 package metrics 2 3 import ( 4 "github.com/prometheus/client_golang/prometheus" 5 "github.com/prometheus/client_golang/prometheus/promauto" 6 7 "github.com/onflow/flow-go/module" 8 ) 9 10 // LocalGossipSubRouterMetrics encapsulates the metrics collectors for GossipSub router of the local node. 11 // It gives a lens into the local node's view of the GossipSub protocol. 12 type LocalGossipSubRouterMetrics struct { 13 // localMeshSize is the number of peers in the local mesh of the node on each topic. 14 localMeshSize prometheus.GaugeVec 15 16 // peerAddedOnProtocolCount is the number of peers added to the local gossipsub router on a gossipsub protocol. 17 peerAddedOnProtocolCount prometheus.CounterVec 18 19 // peerRemovedFromProtocolCount is the number of peers removed from the local gossipsub router (i.e., blacklisted or unavailable). 20 peerRemovedFromProtocolCount prometheus.Counter 21 22 // localPeerJoinedTopicCount is the number of times the local node joined (i.e., subscribed) to a topic. 23 localPeerJoinedTopicCount prometheus.Counter 24 25 // localPeerLeftTopicCount is the number of times the local node left (i.e., unsubscribed) from a topic. 26 localPeerLeftTopicCount prometheus.Counter 27 28 // peerGraftTopicCount is the number of peers grafted to a topic on the local mesh of the node, i.e., the local node 29 // is directly connected to the peer on the topic, and exchange messages directly. 30 peerGraftTopicCount prometheus.CounterVec 31 32 // peerPruneTopicCount is the number of peers pruned from a topic on the local mesh of the node, i.e., the local node 33 // is no longer directly connected to the peer on the topic, and exchange messages indirectly. 34 peerPruneTopicCount prometheus.CounterVec 35 36 // messageEnteredValidationCount is the number of incoming pubsub messages entered internal validation pipeline of gossipsub. 37 messageEnteredValidationCount prometheus.Counter 38 39 // messageDeliveredSize is the size of messages delivered to all subscribers of the topic. 40 messageDeliveredSize prometheus.Histogram 41 42 // messageRejectedSize is the size of inbound messages rejected by the validation pipeline; the rejection reason is also included. 43 messageRejectedSize prometheus.HistogramVec 44 45 // messageDuplicateSize is the size of messages that are duplicates of already received messages. 46 messageDuplicateSize prometheus.Histogram 47 48 // peerThrottledCount is the number of peers that are throttled by the local node, i.e., the local node is not accepting 49 // any pubsub message from the peer but may still accept control messages. 50 peerThrottledCount prometheus.Counter 51 52 // rpcRcvCount is the number of rpc messages received and processed by the router (i.e., passed rpc inspection). 53 rpcRcvCount prometheus.Counter 54 55 // iWantRcvCount is the number of iwant messages received by the router on rpcs. 56 iWantRcvCount prometheus.Counter 57 58 // iHaveRcvCount is the number of ihave messages received by the router on rpcs. 59 iHaveRcvCount prometheus.Counter 60 61 // graftRcvCount is the number of graft messages received by the router on rpcs. 62 graftRcvCount prometheus.Counter 63 64 // pruneRcvCount is the number of prune messages received by the router on rpcs. 65 pruneRcvCount prometheus.Counter 66 67 // pubsubMsgRcvCount is the number of pubsub messages received by the router. 68 pubsubMsgRcvCount prometheus.Counter 69 70 // rpcSentCount is the number of rpc messages sent by the router. 71 rpcSentCount prometheus.Counter 72 73 // iWantSentCount is the number of iwant messages sent by the router on rpcs. 74 iWantSentCount prometheus.Counter 75 76 // iHaveSentCount is the number of ihave messages sent by the router on rpcs. 77 iHaveSentCount prometheus.Counter 78 79 // graftSentCount is the number of graft messages sent by the router on rpcs. 80 graftSentCount prometheus.Counter 81 82 // pruneSentCount is the number of prune messages sent by the router on rpcs. 83 pruneSentCount prometheus.Counter 84 85 // pubsubMsgSentCount is the number of pubsub messages sent by the router. 86 pubsubMsgSentCount prometheus.Counter 87 88 // outboundRpcDroppedCount is the number of outbound rpc messages dropped, typically because the outbound message queue is full. 89 outboundRpcDroppedCount prometheus.Counter 90 91 // undeliveredOutboundMessageCount is the number of undelivered messages, i.e., messages that are not delivered to at least one subscriber. 92 undeliveredOutboundMessageCount prometheus.Counter 93 } 94 95 func NewGossipSubLocalMeshMetrics(prefix string) *LocalGossipSubRouterMetrics { 96 return &LocalGossipSubRouterMetrics{ 97 localMeshSize: *promauto.NewGaugeVec( 98 prometheus.GaugeOpts{ 99 Namespace: namespaceNetwork, 100 Subsystem: subsystemGossip, 101 Name: prefix + "gossipsub_local_mesh_size", 102 Help: "number of peers in the local mesh of the node", 103 }, 104 []string{LabelChannel}, 105 ), 106 peerAddedOnProtocolCount: *promauto.NewCounterVec(prometheus.CounterOpts{ 107 Namespace: namespaceNetwork, 108 Subsystem: subsystemGossip, 109 Name: prefix + "gossipsub_added_peer_on_protocol_total", 110 Help: "number of peers added to the local gossipsub router on a gossipsub protocol", 111 }, []string{LabelProtocol}), 112 peerRemovedFromProtocolCount: prometheus.NewCounter(prometheus.CounterOpts{ 113 Namespace: namespaceNetwork, 114 Subsystem: subsystemGossip, 115 Name: prefix + "gossipsub_removed_peer_total", 116 Help: "number of peers removed from the local gossipsub router on a gossipsub protocol due to unavailability or blacklisting", 117 }), 118 localPeerJoinedTopicCount: prometheus.NewCounter(prometheus.CounterOpts{ 119 Namespace: namespaceNetwork, 120 Subsystem: subsystemGossip, 121 Name: prefix + "gossipsub_joined_topic_total", 122 Help: "number of times the local node joined (i.e., subscribed) to a topic", 123 }), 124 localPeerLeftTopicCount: prometheus.NewCounter(prometheus.CounterOpts{ 125 Namespace: namespaceNetwork, 126 Subsystem: subsystemGossip, 127 Name: prefix + "gossipsub_left_topic_total", 128 Help: "number of times the local node left (i.e., unsubscribed) from a topic", 129 }), 130 peerGraftTopicCount: *promauto.NewCounterVec(prometheus.CounterOpts{ 131 Namespace: namespaceNetwork, 132 Subsystem: subsystemGossip, 133 Name: prefix + "gossipsub_graft_topic_total", 134 Help: "number of peers grafted to a topic on the local mesh of the node", 135 }, []string{LabelChannel}), 136 peerPruneTopicCount: *promauto.NewCounterVec(prometheus.CounterOpts{ 137 Namespace: namespaceNetwork, 138 Subsystem: subsystemGossip, 139 Name: prefix + "gossipsub_prune_topic_total", 140 Help: "number of peers pruned from a topic on the local mesh of the node", 141 }, []string{LabelChannel}), 142 messageEnteredValidationCount: prometheus.NewCounter(prometheus.CounterOpts{ 143 Namespace: namespaceNetwork, 144 Subsystem: subsystemGossip, 145 Name: prefix + "gossipsub_message_entered_validation_total", 146 Help: "number of messages entered internal validation pipeline of gossipsub", 147 }), 148 messageDeliveredSize: prometheus.NewHistogram(prometheus.HistogramOpts{ 149 Namespace: namespaceNetwork, 150 Subsystem: subsystemGossip, 151 Buckets: []float64{KiB, 100 * KiB, 1 * MiB}, 152 Name: prefix + "gossipsub_message_delivered_size", 153 Help: "size of messages delivered to all subscribers of the topic", 154 }), 155 messageRejectedSize: *promauto.NewHistogramVec(prometheus.HistogramOpts{ 156 Namespace: namespaceNetwork, 157 Subsystem: subsystemGossip, 158 Name: prefix + "gossipsub_message_rejected_size_bytes", 159 Help: "size of messages rejected by the validation pipeline", 160 }, []string{LabelRejectionReason}), 161 messageDuplicateSize: prometheus.NewHistogram(prometheus.HistogramOpts{ 162 Namespace: namespaceNetwork, 163 Subsystem: subsystemGossip, 164 Buckets: []float64{KiB, 100 * KiB, 1 * MiB}, 165 Name: prefix + "gossipsub_duplicate_message_size_bytes", 166 Help: "size of messages that are duplicates of already received messages", 167 }), 168 peerThrottledCount: prometheus.NewCounter(prometheus.CounterOpts{ 169 Namespace: namespaceNetwork, 170 Subsystem: subsystemGossip, 171 Name: prefix + "gossipsub_peer_throttled_total", 172 Help: "number of peers that are throttled by the local node, i.e., the local node is not accepting any pubsub message from the peer but may still accept control messages", 173 }), 174 rpcRcvCount: prometheus.NewCounter(prometheus.CounterOpts{ 175 Namespace: namespaceNetwork, 176 Subsystem: subsystemGossip, 177 Name: prefix + "gossipsub_rpc_received_total", 178 Help: "number of rpc messages received and processed by the router (i.e., passed rpc inspection)", 179 }), 180 rpcSentCount: prometheus.NewCounter(prometheus.CounterOpts{ 181 Namespace: namespaceNetwork, 182 Subsystem: subsystemGossip, 183 Name: prefix + "gossipsub_rpc_sent_total", 184 Help: "number of rpc messages sent by the router", 185 }), 186 outboundRpcDroppedCount: prometheus.NewCounter(prometheus.CounterOpts{ 187 Namespace: namespaceNetwork, 188 Subsystem: subsystemGossip, 189 Name: prefix + "gossipsub_rpc_dropped_total", 190 Help: "number of outbound rpc messages dropped, typically because the outbound message queue is full", 191 }), 192 undeliveredOutboundMessageCount: prometheus.NewCounter(prometheus.CounterOpts{ 193 Namespace: namespaceNetwork, 194 Subsystem: subsystemGossip, 195 Name: prefix + "gossipsub_undelivered_message_total", 196 Help: "number of undelivered messages, i.e., messages that are not delivered to at least one subscriber", 197 }), 198 iHaveRcvCount: prometheus.NewCounter(prometheus.CounterOpts{ 199 Namespace: namespaceNetwork, 200 Subsystem: subsystemGossip, 201 Name: prefix + "gossipsub_ihave_received_total", 202 Help: "number of ihave messages received by the router on rpcs", 203 }), 204 iWantRcvCount: prometheus.NewCounter(prometheus.CounterOpts{ 205 Namespace: namespaceNetwork, 206 Subsystem: subsystemGossip, 207 Name: prefix + "gossipsub_iwant_received_total", 208 Help: "number of iwant messages received by the router on rpcs", 209 }), 210 graftRcvCount: prometheus.NewCounter(prometheus.CounterOpts{ 211 Namespace: namespaceNetwork, 212 Subsystem: subsystemGossip, 213 Name: prefix + "gossipsub_graft_received_total", 214 Help: "number of graft messages received by the router on rpcs", 215 }), 216 pruneRcvCount: prometheus.NewCounter(prometheus.CounterOpts{ 217 Namespace: namespaceNetwork, 218 Subsystem: subsystemGossip, 219 Name: prefix + "gossipsub_prune_received_total", 220 Help: "number of prune messages received by the router on rpcs", 221 }), 222 pubsubMsgRcvCount: prometheus.NewCounter(prometheus.CounterOpts{ 223 Namespace: namespaceNetwork, 224 Subsystem: subsystemGossip, 225 Name: prefix + "gossipsub_pubsub_message_received_total", 226 Help: "number of pubsub messages received by the router", 227 }), 228 iHaveSentCount: prometheus.NewCounter(prometheus.CounterOpts{ 229 Namespace: namespaceNetwork, 230 Subsystem: subsystemGossip, 231 Name: prefix + "gossipsub_ihave_sent_total", 232 Help: "number of ihave messages sent by the router on rpcs", 233 }), 234 iWantSentCount: prometheus.NewCounter(prometheus.CounterOpts{ 235 Namespace: namespaceNetwork, 236 Subsystem: subsystemGossip, 237 Name: prefix + "gossipsub_iwant_sent_total", 238 Help: "number of iwant messages sent by the router on rpcs", 239 }), 240 graftSentCount: prometheus.NewCounter(prometheus.CounterOpts{ 241 Namespace: namespaceNetwork, 242 Subsystem: subsystemGossip, 243 Name: prefix + "gossipsub_graft_sent_total", 244 Help: "number of graft messages sent by the router on rpcs", 245 }), 246 pruneSentCount: prometheus.NewCounter(prometheus.CounterOpts{ 247 Namespace: namespaceNetwork, 248 Subsystem: subsystemGossip, 249 Name: prefix + "gossipsub_prune_sent_total", 250 Help: "number of prune messages sent by the router on rpcs", 251 }), 252 pubsubMsgSentCount: prometheus.NewCounter(prometheus.CounterOpts{ 253 Namespace: namespaceNetwork, 254 Subsystem: subsystemGossip, 255 Name: prefix + "gossipsub_pubsub_message_sent_total", 256 Help: "number of pubsub messages sent by the router", 257 }), 258 } 259 } 260 261 var _ module.LocalGossipSubRouterMetrics = (*LocalGossipSubRouterMetrics)(nil) 262 263 // OnLocalMeshSizeUpdated updates the local mesh size metric. 264 func (g *LocalGossipSubRouterMetrics) OnLocalMeshSizeUpdated(topic string, size int) { 265 g.localMeshSize.WithLabelValues(topic).Set(float64(size)) 266 } 267 268 // OnPeerAddedToProtocol is called when the local node receives a stream from a peer on a gossipsub-related protocol. 269 // Args: 270 // 271 // protocol: the protocol name that the peer is connected to. 272 func (g *LocalGossipSubRouterMetrics) OnPeerAddedToProtocol(protocol string) { 273 g.peerAddedOnProtocolCount.WithLabelValues(protocol).Inc() 274 } 275 276 // OnPeerRemovedFromProtocol is called when the local considers a remote peer blacklisted or unavailable. 277 func (g *LocalGossipSubRouterMetrics) OnPeerRemovedFromProtocol() { 278 g.peerRemovedFromProtocolCount.Inc() 279 } 280 281 // OnLocalPeerJoinedTopic is called when the local node subscribes to a gossipsub topic. 282 // Args: 283 // 284 // topic: the topic that the local peer subscribed to. 285 func (g *LocalGossipSubRouterMetrics) OnLocalPeerJoinedTopic() { 286 g.localPeerJoinedTopicCount.Inc() 287 } 288 289 // OnLocalPeerLeftTopic is called when the local node unsubscribes from a gossipsub topic. 290 // Args: 291 // 292 // topic: the topic that the local peer has unsubscribed from. 293 func (g *LocalGossipSubRouterMetrics) OnLocalPeerLeftTopic() { 294 g.localPeerLeftTopicCount.Inc() 295 } 296 297 // OnPeerGraftTopic is called when the local node receives a GRAFT message from a remote peer on a topic. 298 // Note: the received GRAFT at this point is considered passed the RPC inspection, and is accepted by the local node. 299 func (g *LocalGossipSubRouterMetrics) OnPeerGraftTopic(topic string) { 300 g.peerGraftTopicCount.WithLabelValues(topic).Inc() 301 } 302 303 // OnPeerPruneTopic is called when the local node receives a PRUNE message from a remote peer on a topic. 304 // Note: the received PRUNE at this point is considered passed the RPC inspection, and is accepted by the local node. 305 func (g *LocalGossipSubRouterMetrics) OnPeerPruneTopic(topic string) { 306 g.peerPruneTopicCount.WithLabelValues(topic).Inc() 307 } 308 309 // OnMessageEnteredValidation is called when a received pubsub message enters the validation pipeline. It is the 310 // internal validation pipeline of GossipSub protocol. The message may be rejected or accepted by the validation 311 // pipeline. 312 func (g *LocalGossipSubRouterMetrics) OnMessageEnteredValidation(int) { 313 g.messageEnteredValidationCount.Inc() 314 } 315 316 // OnMessageRejected is called when a received pubsub message is rejected by the validation pipeline. 317 // Args: 318 // 319 // reason: the reason for rejection. 320 // size: the size of the rejected message. 321 func (g *LocalGossipSubRouterMetrics) OnMessageRejected(size int, reason string) { 322 g.messageRejectedSize.WithLabelValues(reason).Observe(float64(size)) 323 } 324 325 // OnMessageDuplicate is called when a received pubsub message is a duplicate of a previously received message, and 326 // is dropped. 327 // Args: 328 // 329 // size: the size of the duplicate message. 330 func (g *LocalGossipSubRouterMetrics) OnMessageDuplicate(size int) { 331 g.messageDuplicateSize.Observe(float64(size)) 332 } 333 334 // OnPeerThrottled is called when a peer is throttled by the local node, i.e., the local node is not accepting any 335 // pubsub message from the peer but may still accept control messages. 336 func (g *LocalGossipSubRouterMetrics) OnPeerThrottled() { 337 g.peerThrottledCount.Inc() 338 } 339 340 // OnRpcReceived is called when an RPC message is received by the local node. The received RPC is considered 341 // passed the RPC inspection, and is accepted by the local node. 342 func (g *LocalGossipSubRouterMetrics) OnRpcReceived(msgCount int, iHaveCount int, iWantCount int, graftCount int, pruneCount int) { 343 g.rpcRcvCount.Inc() 344 g.pubsubMsgRcvCount.Add(float64(msgCount)) 345 g.iHaveRcvCount.Add(float64(iHaveCount)) 346 g.iWantRcvCount.Add(float64(iWantCount)) 347 g.graftRcvCount.Add(float64(graftCount)) 348 g.pruneRcvCount.Add(float64(pruneCount)) 349 } 350 351 // OnRpcSent is called when an RPC message is sent by the local node. 352 // Note: the sent RPC is considered passed the RPC inspection, and is accepted by the local node. 353 func (g *LocalGossipSubRouterMetrics) OnRpcSent(msgCount int, iHaveCount int, iWantCount int, graftCount int, pruneCount int) { 354 g.rpcSentCount.Inc() 355 g.pubsubMsgSentCount.Add(float64(msgCount)) 356 g.iHaveSentCount.Add(float64(iHaveCount)) 357 g.iWantSentCount.Add(float64(iWantCount)) 358 g.graftSentCount.Add(float64(graftCount)) 359 g.pruneSentCount.Add(float64(pruneCount)) 360 } 361 362 // OnOutboundRpcDropped is called when an outbound RPC message is dropped by the local node, typically because the local node 363 // outbound message queue is full; or the RPC is big and the local node cannot fragment it. 364 func (g *LocalGossipSubRouterMetrics) OnOutboundRpcDropped() { 365 g.outboundRpcDroppedCount.Inc() 366 } 367 368 // OnUndeliveredMessage is called when a message is not delivered at least one subscriber of the topic, for example when 369 // the subscriber is too slow to process the message. 370 func (g *LocalGossipSubRouterMetrics) OnUndeliveredMessage() { 371 g.undeliveredOutboundMessageCount.Inc() 372 } 373 374 // OnMessageDeliveredToAllSubscribers is called when a message is delivered to all subscribers of the topic. 375 // Args: 376 // 377 // size: the size of the delivered message. 378 func (g *LocalGossipSubRouterMetrics) OnMessageDeliveredToAllSubscribers(size int) { 379 g.messageDeliveredSize.Observe(float64(size)) 380 }