github.com/celestiaorg/celestia-node@v0.15.0-beta.1/share/p2p/peers/metrics.go (about) 1 package peers 2 3 import ( 4 "context" 5 "fmt" 6 "sync" 7 "time" 8 9 pubsub "github.com/libp2p/go-libp2p-pubsub" 10 "github.com/libp2p/go-libp2p/core/peer" 11 "go.opentelemetry.io/otel" 12 "go.opentelemetry.io/otel/attribute" 13 "go.opentelemetry.io/otel/metric" 14 15 "github.com/celestiaorg/celestia-node/libs/utils" 16 "github.com/celestiaorg/celestia-node/share/p2p/shrexsub" 17 ) 18 19 const ( 20 isInstantKey = "is_instant" 21 doneResultKey = "done_result" 22 23 sourceKey = "source" 24 sourceShrexSub peerSource = "shrexsub" 25 sourceFullNodes peerSource = "full_nodes" 26 27 blacklistPeerReasonKey = "blacklist_reason" 28 reasonInvalidHash blacklistPeerReason = "invalid_hash" 29 reasonMisbehave blacklistPeerReason = "misbehave" 30 31 validationResultKey = "validation_result" 32 validationAccept = "accept" 33 validationReject = "reject" 34 validationIgnore = "ignore" 35 36 peerStatusKey = "peer_status" 37 peerStatusActive peerStatus = "active" 38 peerStatusCooldown peerStatus = "cooldown" 39 40 poolStatusKey = "pool_status" 41 poolStatusCreated poolStatus = "created" 42 poolStatusValidated poolStatus = "validated" 43 poolStatusBlacklisted poolStatus = "blacklisted" 44 // Pool status model: 45 // created(unvalidated) 46 // / \ 47 // validated blacklisted 48 ) 49 50 var meter = otel.Meter("shrex_peer_manager") 51 52 type blacklistPeerReason string 53 54 type peerStatus string 55 56 type poolStatus string 57 58 type peerSource string 59 60 type metrics struct { 61 getPeer metric.Int64Counter // attributes: source, is_instant 62 getPeerWaitTimeHistogram metric.Int64Histogram // attributes: source 63 getPeerPoolSizeHistogram metric.Int64Histogram // attributes: source 64 doneResult metric.Int64Counter // attributes: source, done_result 65 validationResult metric.Int64Counter // attributes: validation_result 66 67 shrexPools metric.Int64ObservableGauge // attributes: pool_status 68 fullNodesPool metric.Int64ObservableGauge // attributes: pool_status 69 blacklistedPeersByReason sync.Map 70 blacklistedPeers metric.Int64ObservableGauge // attributes: blacklist_reason 71 } 72 73 func initMetrics(manager *Manager) (*metrics, error) { 74 getPeer, err := meter.Int64Counter("peer_manager_get_peer_counter", 75 metric.WithDescription("get peer counter")) 76 if err != nil { 77 return nil, err 78 } 79 80 getPeerWaitTimeHistogram, err := meter.Int64Histogram("peer_manager_get_peer_ms_time_hist", 81 metric.WithDescription("get peer time histogram(ms), observed only for async get(is_instant = false)")) 82 if err != nil { 83 return nil, err 84 } 85 86 getPeerPoolSizeHistogram, err := meter.Int64Histogram("peer_manager_get_peer_pool_size_hist", 87 metric.WithDescription("amount of available active peers in pool at time when get was called")) 88 if err != nil { 89 return nil, err 90 } 91 92 doneResult, err := meter.Int64Counter("peer_manager_done_result_counter", 93 metric.WithDescription("done results counter")) 94 if err != nil { 95 return nil, err 96 } 97 98 validationResult, err := meter.Int64Counter("peer_manager_validation_result_counter", 99 metric.WithDescription("validation result counter")) 100 if err != nil { 101 return nil, err 102 } 103 104 shrexPools, err := meter.Int64ObservableGauge("peer_manager_pools_gauge", 105 metric.WithDescription("pools amount")) 106 if err != nil { 107 return nil, err 108 } 109 110 fullNodesPool, err := meter.Int64ObservableGauge("peer_manager_full_nodes_gauge", 111 metric.WithDescription("full nodes pool peers amount")) 112 if err != nil { 113 return nil, err 114 } 115 116 blacklisted, err := meter.Int64ObservableGauge("peer_manager_blacklisted_peers", 117 metric.WithDescription("blacklisted peers amount")) 118 if err != nil { 119 return nil, err 120 } 121 122 metrics := &metrics{ 123 getPeer: getPeer, 124 getPeerWaitTimeHistogram: getPeerWaitTimeHistogram, 125 doneResult: doneResult, 126 validationResult: validationResult, 127 shrexPools: shrexPools, 128 fullNodesPool: fullNodesPool, 129 getPeerPoolSizeHistogram: getPeerPoolSizeHistogram, 130 blacklistedPeers: blacklisted, 131 } 132 133 callback := func(ctx context.Context, observer metric.Observer) error { 134 for poolStatus, count := range manager.shrexPools() { 135 observer.ObserveInt64(shrexPools, count, 136 metric.WithAttributes( 137 attribute.String(poolStatusKey, string(poolStatus)))) 138 } 139 140 observer.ObserveInt64(fullNodesPool, int64(manager.fullNodes.len()), 141 metric.WithAttributes( 142 attribute.String(peerStatusKey, string(peerStatusActive)))) 143 observer.ObserveInt64(fullNodesPool, int64(manager.fullNodes.cooldown.len()), 144 metric.WithAttributes( 145 attribute.String(peerStatusKey, string(peerStatusCooldown)))) 146 147 metrics.blacklistedPeersByReason.Range(func(key, value any) bool { 148 reason := key.(blacklistPeerReason) 149 amount := value.(int) 150 observer.ObserveInt64(blacklisted, int64(amount), 151 metric.WithAttributes( 152 attribute.String(blacklistPeerReasonKey, string(reason)))) 153 return true 154 }) 155 return nil 156 } 157 _, err = meter.RegisterCallback(callback, shrexPools, fullNodesPool, blacklisted) 158 if err != nil { 159 return nil, fmt.Errorf("registering metrics callback: %w", err) 160 } 161 return metrics, nil 162 } 163 164 func (m *metrics) observeGetPeer( 165 ctx context.Context, 166 source peerSource, poolSize int, waitTime time.Duration, 167 ) { 168 if m == nil { 169 return 170 } 171 ctx = utils.ResetContextOnError(ctx) 172 m.getPeer.Add(ctx, 1, 173 metric.WithAttributes( 174 attribute.String(sourceKey, string(source)), 175 attribute.Bool(isInstantKey, waitTime == 0))) 176 if source == sourceShrexSub { 177 m.getPeerPoolSizeHistogram.Record(ctx, int64(poolSize), 178 metric.WithAttributes( 179 attribute.String(sourceKey, string(source)))) 180 } 181 182 // record wait time only for async gets 183 if waitTime > 0 { 184 m.getPeerWaitTimeHistogram.Record(ctx, waitTime.Milliseconds(), 185 metric.WithAttributes( 186 attribute.String(sourceKey, string(source)))) 187 } 188 } 189 190 func (m *metrics) observeDoneResult(source peerSource, result result) { 191 if m == nil { 192 return 193 } 194 195 ctx := context.Background() 196 m.doneResult.Add(ctx, 1, 197 metric.WithAttributes( 198 attribute.String(sourceKey, string(source)), 199 attribute.String(doneResultKey, string(result)))) 200 } 201 202 // validationObserver is a middleware that observes validation results as metrics 203 func (m *metrics) validationObserver(validator shrexsub.ValidatorFn) shrexsub.ValidatorFn { 204 if m == nil { 205 return validator 206 } 207 return func(ctx context.Context, id peer.ID, n shrexsub.Notification) pubsub.ValidationResult { 208 res := validator(ctx, id, n) 209 210 var resStr string 211 switch res { 212 case pubsub.ValidationAccept: 213 resStr = validationAccept 214 case pubsub.ValidationReject: 215 resStr = validationReject 216 case pubsub.ValidationIgnore: 217 resStr = validationIgnore 218 default: 219 resStr = "unknown" 220 } 221 222 ctx = utils.ResetContextOnError(ctx) 223 224 m.validationResult.Add(ctx, 1, 225 metric.WithAttributes( 226 attribute.String(validationResultKey, resStr))) 227 return res 228 } 229 } 230 231 // observeBlacklistPeers stores amount of blacklisted peers by reason 232 func (m *metrics) observeBlacklistPeers(reason blacklistPeerReason, amount int) { 233 if m == nil { 234 return 235 } 236 for { 237 prevVal, loaded := m.blacklistedPeersByReason.LoadOrStore(reason, amount) 238 if !loaded { 239 return 240 } 241 242 newVal := prevVal.(int) + amount 243 if m.blacklistedPeersByReason.CompareAndSwap(reason, prevVal, newVal) { 244 return 245 } 246 } 247 } 248 249 // shrexPools collects amount of shrex pools by poolStatus 250 func (m *Manager) shrexPools() map[poolStatus]int64 { 251 m.lock.Lock() 252 defer m.lock.Unlock() 253 254 shrexPools := make(map[poolStatus]int64) 255 for _, p := range m.pools { 256 if !p.isValidatedDataHash.Load() { 257 shrexPools[poolStatusCreated]++ 258 continue 259 } 260 261 // pool is validated but not synced 262 shrexPools[poolStatusValidated]++ 263 } 264 265 shrexPools[poolStatusBlacklisted] = int64(len(m.blacklistedHashes)) 266 return shrexPools 267 }