github.com/koko1123/flow-go-1@v0.29.6/module/metrics/network.go (about) 1 package metrics 2 3 import ( 4 "strconv" 5 "time" 6 7 "github.com/prometheus/client_golang/prometheus" 8 "github.com/prometheus/client_golang/prometheus/promauto" 9 "github.com/rs/zerolog" 10 11 "github.com/koko1123/flow-go-1/module" 12 ) 13 14 const ( 15 _ = iota 16 KiB = 1 << (10 * iota) 17 MiB 18 GiB 19 ) 20 21 type NetworkCollector struct { 22 *LibP2PResourceManagerMetrics 23 *GossipSubMetrics 24 outboundMessageSize *prometheus.HistogramVec 25 inboundMessageSize *prometheus.HistogramVec 26 duplicateMessagesDropped *prometheus.CounterVec 27 queueSize *prometheus.GaugeVec 28 queueDuration *prometheus.HistogramVec 29 numMessagesProcessing *prometheus.GaugeVec 30 numDirectMessagesSending *prometheus.GaugeVec 31 inboundProcessTime *prometheus.CounterVec 32 outboundConnectionCount prometheus.Gauge 33 inboundConnectionCount prometheus.Gauge 34 dnsLookupDuration prometheus.Histogram 35 dnsCacheMissCount prometheus.Counter 36 dnsCacheHitCount prometheus.Counter 37 dnsCacheInvalidationCount prometheus.Counter 38 dnsLookupRequestDroppedCount prometheus.Counter 39 routingTableSize prometheus.Gauge 40 41 // authorization, rate limiting metrics 42 unAuthorizedMessagesCount *prometheus.CounterVec 43 rateLimitedUnicastMessagesCount *prometheus.CounterVec 44 45 prefix string 46 } 47 48 var _ module.NetworkMetrics = (*NetworkCollector)(nil) 49 50 type NetworkCollectorOpt func(*NetworkCollector) 51 52 func WithNetworkPrefix(prefix string) NetworkCollectorOpt { 53 return func(nc *NetworkCollector) { 54 if prefix != "" { 55 nc.prefix = prefix + "_" 56 } 57 } 58 } 59 60 func NewNetworkCollector(logger zerolog.Logger, opts ...NetworkCollectorOpt) *NetworkCollector { 61 nc := &NetworkCollector{} 62 63 for _, opt := range opts { 64 opt(nc) 65 } 66 67 nc.LibP2PResourceManagerMetrics = NewLibP2PResourceManagerMetrics(logger, nc.prefix) 68 nc.GossipSubMetrics = NewGossipSubMetrics(nc.prefix) 69 70 nc.outboundMessageSize = promauto.NewHistogramVec( 71 prometheus.HistogramOpts{ 72 Namespace: namespaceNetwork, 73 Subsystem: subsystemGossip, 74 Name: nc.prefix + "outbound_message_size_bytes", 75 Help: "size of the outbound network message", 76 Buckets: []float64{KiB, 100 * KiB, 500 * KiB, 1 * MiB, 2 * MiB, 4 * MiB}, 77 }, []string{LabelChannel, LabelProtocol, LabelMessage}, 78 ) 79 80 nc.inboundMessageSize = promauto.NewHistogramVec( 81 prometheus.HistogramOpts{ 82 Namespace: namespaceNetwork, 83 Subsystem: subsystemGossip, 84 Name: nc.prefix + "inbound_message_size_bytes", 85 Help: "size of the inbound network message", 86 Buckets: []float64{KiB, 100 * KiB, 500 * KiB, 1 * MiB, 2 * MiB, 4 * MiB}, 87 }, []string{LabelChannel, LabelProtocol, LabelMessage}, 88 ) 89 90 nc.duplicateMessagesDropped = promauto.NewCounterVec( 91 prometheus.CounterOpts{ 92 Namespace: namespaceNetwork, 93 Subsystem: subsystemGossip, 94 Name: nc.prefix + "duplicate_messages_dropped", 95 Help: "number of duplicate messages dropped", 96 }, []string{LabelChannel, LabelProtocol, LabelMessage}, 97 ) 98 99 nc.dnsLookupDuration = promauto.NewHistogram( 100 prometheus.HistogramOpts{ 101 Namespace: namespaceNetwork, 102 Subsystem: subsystemGossip, 103 Name: nc.prefix + "dns_lookup_duration_ms", 104 Buckets: []float64{1, 10, 100, 500, 1000, 2000}, 105 Help: "the time spent on resolving a dns lookup (including cache hits)", 106 }, 107 ) 108 109 nc.dnsCacheMissCount = promauto.NewCounter( 110 prometheus.CounterOpts{ 111 Namespace: namespaceNetwork, 112 Subsystem: subsystemGossip, 113 Name: nc.prefix + "dns_cache_miss_total", 114 Help: "the number of dns lookups that miss the cache and made through network", 115 }, 116 ) 117 118 nc.dnsCacheInvalidationCount = promauto.NewCounter( 119 prometheus.CounterOpts{ 120 Namespace: namespaceNetwork, 121 Subsystem: subsystemGossip, 122 Name: nc.prefix + "dns_cache_invalidation_total", 123 Help: "the number of times dns cache is invalidated for an entry", 124 }, 125 ) 126 127 nc.dnsCacheHitCount = promauto.NewCounter( 128 prometheus.CounterOpts{ 129 Namespace: namespaceNetwork, 130 Subsystem: subsystemGossip, 131 Name: nc.prefix + "dns_cache_hit_total", 132 Help: "the number of dns cache hits", 133 }, 134 ) 135 136 nc.dnsLookupRequestDroppedCount = promauto.NewCounter( 137 prometheus.CounterOpts{ 138 Namespace: namespaceNetwork, 139 Subsystem: subsystemGossip, 140 Name: nc.prefix + "dns_lookup_requests_dropped_total", 141 Help: "the number of dns lookup requests dropped", 142 }, 143 ) 144 145 nc.queueSize = promauto.NewGaugeVec( 146 prometheus.GaugeOpts{ 147 Namespace: namespaceNetwork, 148 Subsystem: subsystemQueue, 149 Name: nc.prefix + "message_queue_size", 150 Help: "the number of elements in the message receive queue", 151 }, []string{LabelPriority}, 152 ) 153 154 nc.queueDuration = promauto.NewHistogramVec( 155 prometheus.HistogramOpts{ 156 Namespace: namespaceNetwork, 157 Subsystem: subsystemQueue, 158 Name: nc.prefix + "message_queue_duration_seconds", 159 Help: "duration [seconds; measured with float64 precision] of how long a message spent in the queue before delivered to an engine.", 160 Buckets: []float64{0.01, 0.1, 0.5, 1, 2, 5}, // 10ms, 100ms, 500ms, 1s, 2s, 5s 161 }, []string{LabelPriority}, 162 ) 163 164 nc.numMessagesProcessing = promauto.NewGaugeVec( 165 prometheus.GaugeOpts{ 166 Namespace: namespaceNetwork, 167 Subsystem: subsystemQueue, 168 Name: nc.prefix + "current_messages_processing", 169 Help: "the number of messages currently being processed", 170 }, []string{LabelChannel}, 171 ) 172 173 nc.numDirectMessagesSending = promauto.NewGaugeVec( 174 prometheus.GaugeOpts{ 175 Namespace: namespaceNetwork, 176 Subsystem: subsystemGossip, 177 Name: nc.prefix + "direct_messages_in_progress", 178 Help: "the number of direct messages currently in the process of sending", 179 }, []string{LabelChannel}, 180 ) 181 182 nc.inboundProcessTime = promauto.NewCounterVec( 183 prometheus.CounterOpts{ 184 Namespace: namespaceNetwork, 185 Subsystem: subsystemQueue, 186 Name: nc.prefix + "engine_message_processing_time_seconds", 187 Help: "duration [seconds; measured with float64 precision] of how long a queue worker blocked for an engine processing message", 188 }, []string{LabelChannel}, 189 ) 190 191 nc.outboundConnectionCount = promauto.NewGauge( 192 prometheus.GaugeOpts{ 193 Namespace: namespaceNetwork, 194 Subsystem: subsystemQueue, 195 Name: nc.prefix + "outbound_connection_count", 196 Help: "the number of outbound connections of this node", 197 }, 198 ) 199 200 nc.inboundConnectionCount = promauto.NewGauge( 201 prometheus.GaugeOpts{ 202 Namespace: namespaceNetwork, 203 Subsystem: subsystemQueue, 204 Name: nc.prefix + "inbound_connection_count", 205 Help: "the number of inbound connections of this node", 206 }, 207 ) 208 209 nc.routingTableSize = promauto.NewGauge( 210 prometheus.GaugeOpts{ 211 Name: nc.prefix + "routing_table_size", 212 Namespace: namespaceNetwork, 213 Subsystem: subsystemDHT, 214 Help: "the size of the DHT routing table", 215 }, 216 ) 217 218 nc.unAuthorizedMessagesCount = promauto.NewCounterVec( 219 prometheus.CounterOpts{ 220 Namespace: namespaceNetwork, 221 Subsystem: subsystemAuth, 222 Name: nc.prefix + "unauthorized_messages_count", 223 Help: "number of messages that failed authorization validation", 224 }, []string{LabelNodeRole, LabelMessage, LabelChannel, LabelViolationReason}, 225 ) 226 227 nc.rateLimitedUnicastMessagesCount = promauto.NewCounterVec( 228 prometheus.CounterOpts{ 229 Namespace: namespaceNetwork, 230 Subsystem: subsystemRateLimiting, 231 Name: nc.prefix + "rate_limited_unicast_messages_count", 232 Help: "number of messages sent via unicast that have been rate limited", 233 }, []string{LabelNodeRole, LabelMessage, LabelChannel, LabelRateLimitReason}, 234 ) 235 236 return nc 237 } 238 239 // OutboundMessageSent collects metrics related to a message sent by the node. 240 func (nc *NetworkCollector) OutboundMessageSent(sizeBytes int, topic, protocol, messageType string) { 241 nc.outboundMessageSize.WithLabelValues(topic, protocol, messageType).Observe(float64(sizeBytes)) 242 } 243 244 // InboundMessageReceived collects metrics related to a message received by the node. 245 func (nc *NetworkCollector) InboundMessageReceived(sizeBytes int, topic, protocol, messageType string) { 246 nc.inboundMessageSize.WithLabelValues(topic, protocol, messageType).Observe(float64(sizeBytes)) 247 } 248 249 // DuplicateInboundMessagesDropped increments the metric tracking the number of duplicate messages dropped by the node. 250 func (nc *NetworkCollector) DuplicateInboundMessagesDropped(topic, protocol, messageType string) { 251 nc.duplicateMessagesDropped.WithLabelValues(topic, protocol, messageType).Add(1) 252 } 253 254 func (nc *NetworkCollector) MessageAdded(priority int) { 255 nc.queueSize.WithLabelValues(strconv.Itoa(priority)).Inc() 256 } 257 258 func (nc *NetworkCollector) MessageRemoved(priority int) { 259 nc.queueSize.WithLabelValues(strconv.Itoa(priority)).Dec() 260 } 261 262 func (nc *NetworkCollector) QueueDuration(duration time.Duration, priority int) { 263 nc.queueDuration.WithLabelValues(strconv.Itoa(priority)).Observe(duration.Seconds()) 264 } 265 266 // MessageProcessingStarted increments the metric tracking the number of messages being processed by the node. 267 func (nc *NetworkCollector) MessageProcessingStarted(topic string) { 268 nc.numMessagesProcessing.WithLabelValues(topic).Inc() 269 } 270 271 // UnicastMessageSendingStarted increments the metric tracking the number of unicast messages sent by the node. 272 func (nc *NetworkCollector) UnicastMessageSendingStarted(topic string) { 273 nc.numDirectMessagesSending.WithLabelValues(topic).Inc() 274 } 275 276 // UnicastMessageSendingCompleted decrements the metric tracking the number of unicast messages sent by the node. 277 func (nc *NetworkCollector) UnicastMessageSendingCompleted(topic string) { 278 nc.numDirectMessagesSending.WithLabelValues(topic).Dec() 279 } 280 281 func (nc *NetworkCollector) RoutingTablePeerAdded() { 282 nc.routingTableSize.Inc() 283 } 284 285 func (nc *NetworkCollector) RoutingTablePeerRemoved() { 286 nc.routingTableSize.Dec() 287 } 288 289 // MessageProcessingFinished tracks the time spent by the node to process a message and decrements the metric tracking 290 // the number of messages being processed by the node. 291 func (nc *NetworkCollector) MessageProcessingFinished(topic string, duration time.Duration) { 292 nc.numMessagesProcessing.WithLabelValues(topic).Dec() 293 nc.inboundProcessTime.WithLabelValues(topic).Add(duration.Seconds()) 294 } 295 296 // OutboundConnections updates the metric tracking the number of outbound connections of this node 297 func (nc *NetworkCollector) OutboundConnections(connectionCount uint) { 298 nc.outboundConnectionCount.Set(float64(connectionCount)) 299 } 300 301 // InboundConnections updates the metric tracking the number of inbound connections of this node 302 func (nc *NetworkCollector) InboundConnections(connectionCount uint) { 303 nc.inboundConnectionCount.Set(float64(connectionCount)) 304 } 305 306 // DNSLookupDuration tracks the time spent to resolve a DNS address. 307 func (nc *NetworkCollector) DNSLookupDuration(duration time.Duration) { 308 nc.dnsLookupDuration.Observe(float64(duration.Milliseconds())) 309 } 310 311 // OnDNSCacheMiss tracks the total number of dns requests resolved through looking up the network. 312 func (nc *NetworkCollector) OnDNSCacheMiss() { 313 nc.dnsCacheMissCount.Inc() 314 } 315 316 // OnDNSCacheInvalidated is called whenever dns cache is invalidated for an entry 317 func (nc *NetworkCollector) OnDNSCacheInvalidated() { 318 nc.dnsCacheInvalidationCount.Inc() 319 } 320 321 // OnDNSCacheHit tracks the total number of dns requests resolved through the cache without 322 // looking up the network. 323 func (nc *NetworkCollector) OnDNSCacheHit() { 324 nc.dnsCacheHitCount.Inc() 325 } 326 327 // OnDNSLookupRequestDropped tracks the number of dns lookup requests that are dropped due to a full queue 328 func (nc *NetworkCollector) OnDNSLookupRequestDropped() { 329 nc.dnsLookupRequestDroppedCount.Inc() 330 } 331 332 // OnUnauthorizedMessage tracks the number of unauthorized messages seen on the network. 333 func (nc *NetworkCollector) OnUnauthorizedMessage(role, msgType, topic, offense string) { 334 nc.unAuthorizedMessagesCount.WithLabelValues(role, msgType, topic, offense).Inc() 335 } 336 337 // OnRateLimitedUnicastMessage tracks the number of rate limited messages seen on the network. 338 func (nc *NetworkCollector) OnRateLimitedUnicastMessage(role, msgType, topic, reason string) { 339 nc.rateLimitedUnicastMessagesCount.WithLabelValues(role, msgType, topic, reason).Inc() 340 }