github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/network/p2p/tracer/gossipSubScoreTracer.go (about) 1 package tracer 2 3 import ( 4 "sync" 5 "time" 6 7 "github.com/libp2p/go-libp2p/core/peer" 8 "github.com/rs/zerolog" 9 10 "github.com/onflow/flow-go/module" 11 "github.com/onflow/flow-go/module/component" 12 "github.com/onflow/flow-go/module/irrecoverable" 13 "github.com/onflow/flow-go/network/channels" 14 "github.com/onflow/flow-go/network/p2p" 15 p2plogging "github.com/onflow/flow-go/network/p2p/logging" 16 "github.com/onflow/flow-go/utils/logging" 17 ) 18 19 const ( 20 PeerScoreLogMessage = "peer score snapshot update" 21 ) 22 23 // GossipSubScoreTracer is a tracer that keeps track of the peer scores of the gossipsub router. 24 // It is used to log the peer scores at regular intervals. 25 type GossipSubScoreTracer struct { 26 component.Component 27 28 updateInterval time.Duration // interval at which it is expecting to receive updates from the gossipsub router 29 logger zerolog.Logger 30 collector module.GossipSubScoringMetrics 31 32 snapshotUpdate chan struct{} // a channel to notify the snapshot update. 33 snapshotLock sync.RWMutex 34 snapshot map[peer.ID]*p2p.PeerScoreSnapshot 35 snapShotUpdateReq chan map[peer.ID]*p2p.PeerScoreSnapshot 36 idProvider module.IdentityProvider 37 } 38 39 var _ p2p.PeerScoreTracer = (*GossipSubScoreTracer)(nil) 40 41 func NewGossipSubScoreTracer( 42 logger zerolog.Logger, 43 provider module.IdentityProvider, 44 collector module.GossipSubScoringMetrics, 45 updateInterval time.Duration) *GossipSubScoreTracer { 46 g := &GossipSubScoreTracer{ 47 logger: logger.With().Str("component", "gossipsub_score_tracer").Logger(), 48 updateInterval: updateInterval, 49 collector: collector, 50 snapshotUpdate: make(chan struct{}, 1), 51 snapShotUpdateReq: make(chan map[peer.ID]*p2p.PeerScoreSnapshot, 1), 52 snapshot: make(map[peer.ID]*p2p.PeerScoreSnapshot), 53 idProvider: provider, 54 } 55 56 g.Component = component.NewComponentManagerBuilder(). 57 AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 58 ready() 59 g.logLoop(ctx) 60 }). 61 Build() 62 63 return g 64 } 65 66 // UpdatePeerScoreSnapshots updates the tracer's snapshot of the peer scores. It is called by the gossipsub router. 67 // It is non-blocking and asynchrounous. If there is no update pending, it queues an update. If there is an update pending, 68 // it drops the update. 69 func (g *GossipSubScoreTracer) UpdatePeerScoreSnapshots(snapshot map[peer.ID]*p2p.PeerScoreSnapshot) { 70 select { 71 case g.snapShotUpdateReq <- snapshot: 72 default: 73 // if the channel is full, we drop the update. This should rarely happen as the log loop should be able to keep up. 74 // if it does happen, it means that the log loop is not running or is blocked. In this case, we don't want to block 75 // the main thread. 76 g.logger.Warn().Msg("dropping peer score snapshot update, channel full") 77 } 78 } 79 80 // UpdateInterval returns the interval at which the tracer expects to receive updates from the gossipsub router. 81 func (g *GossipSubScoreTracer) UpdateInterval() time.Duration { 82 return g.updateInterval 83 } 84 85 // GetScore returns the overall score for the given peer. 86 func (g *GossipSubScoreTracer) GetScore(peerID peer.ID) (float64, bool) { 87 g.snapshotLock.RLock() 88 defer g.snapshotLock.RUnlock() 89 90 if snapshot, ok := g.snapshot[peerID]; ok { 91 return snapshot.Score, true 92 } 93 94 return 0, false 95 } 96 97 // GetAppScore returns the application score for the given peer. 98 func (g *GossipSubScoreTracer) GetAppScore(peerID peer.ID) (float64, bool) { 99 g.snapshotLock.RLock() 100 defer g.snapshotLock.RUnlock() 101 102 if snapshot, ok := g.snapshot[peerID]; ok { 103 return snapshot.AppSpecificScore, true 104 } 105 106 return 0, false 107 } 108 109 // GetIPColocationFactor returns the IP colocation factor for the given peer. 110 func (g *GossipSubScoreTracer) GetIPColocationFactor(peerID peer.ID) (float64, bool) { 111 g.snapshotLock.RLock() 112 defer g.snapshotLock.RUnlock() 113 114 if snapshot, ok := g.snapshot[peerID]; ok { 115 return snapshot.IPColocationFactor, true 116 } 117 118 return 0, false 119 } 120 121 // GetBehaviourPenalty returns the behaviour penalty for the given peer. 122 func (g *GossipSubScoreTracer) GetBehaviourPenalty(peerID peer.ID) (float64, bool) { 123 g.snapshotLock.RLock() 124 defer g.snapshotLock.RUnlock() 125 126 if snapshot, ok := g.snapshot[peerID]; ok { 127 return snapshot.BehaviourPenalty, true 128 } 129 130 return 0, false 131 } 132 133 // GetTopicScores returns the topic scores for the given peer. 134 // The returned map is keyed by topic name. 135 func (g *GossipSubScoreTracer) GetTopicScores(peerID peer.ID) (map[string]p2p.TopicScoreSnapshot, bool) { 136 g.snapshotLock.RLock() 137 defer g.snapshotLock.RUnlock() 138 139 snapshot, ok := g.snapshot[peerID] 140 if !ok { 141 return nil, false 142 } 143 144 topicsSnapshot := make(map[string]p2p.TopicScoreSnapshot) 145 // copy the topic scores into a new map 146 for topic, topicSnapshot := range snapshot.Topics { 147 topicsSnapshot[topic] = *topicSnapshot 148 } 149 150 return topicsSnapshot, true 151 } 152 153 func (g *GossipSubScoreTracer) logLoop(ctx irrecoverable.SignalerContext) { 154 g.logger.Debug().Msg("starting log loop") 155 for { 156 select { 157 case <-ctx.Done(): 158 g.logger.Debug().Msg("stopping log loop") 159 return 160 default: 161 } 162 163 select { 164 case <-ctx.Done(): 165 g.logger.Debug().Msg("stopping log loop") 166 return 167 case snapshot := <-g.snapShotUpdateReq: 168 g.logger.Debug().Msg("received snapshot update") 169 g.updateSnapshot(snapshot) 170 g.logger.Debug().Msg("snapshot updated") 171 g.logPeerScores() 172 g.logger.Debug().Msg("peer scores logged") 173 } 174 } 175 } 176 177 // updateSnapshot updates the tracer's snapshot of the peer scores. 178 // It is called by the log loop, it is a blocking and synchronous call. 179 func (g *GossipSubScoreTracer) updateSnapshot(snapshot map[peer.ID]*p2p.PeerScoreSnapshot) { 180 g.snapshotLock.Lock() 181 defer g.snapshotLock.Unlock() 182 183 g.snapshot = snapshot 184 } 185 186 // logPeerScores logs the peer score snapshots for all peers. 187 func (g *GossipSubScoreTracer) logPeerScores() { 188 g.snapshotLock.RLock() 189 defer g.snapshotLock.RUnlock() 190 191 g.logger.Debug().Msg("logging peer scores") 192 warningStateCount := uint(0) 193 194 for peerID := range g.snapshot { 195 warning := g.logPeerScore(peerID) 196 if warning { 197 warningStateCount++ 198 } 199 } 200 201 g.collector.SetWarningStateCount(warningStateCount) 202 g.logger.Debug().Msg("finished logging peer scores") 203 } 204 205 // logPeerScore logs the peer score snapshot for the given peer. 206 // It also updates the score-related metrics. 207 // The return value indicates whether the peer score is in the warning state. 208 // Note: this function is not thread-safe and should be called with the lock held. 209 func (g *GossipSubScoreTracer) logPeerScore(peerID peer.ID) bool { 210 snapshot, ok := g.snapshot[peerID] 211 if !ok { 212 return false 213 } 214 215 var lg zerolog.Logger 216 217 identity, valid := g.idProvider.ByPeerID(peerID) 218 if !valid { 219 lg = g.logger.With(). 220 Str("flow_id", "unknown"). 221 Str("role", "unknown").Logger() 222 } else { 223 lg = g.logger.With(). 224 Hex("flow_id", logging.ID(identity.NodeID)). 225 Str("role", identity.Role.String()).Logger() 226 } 227 228 lg = lg.With(). 229 Str("peer_id", p2plogging.PeerId(peerID)). 230 Float64("overall_score", snapshot.Score). 231 Float64("app_specific_score", snapshot.AppSpecificScore). 232 Float64("ip_colocation_factor", snapshot.IPColocationFactor). 233 Float64("behaviour_penalty", snapshot.BehaviourPenalty).Logger() 234 235 g.collector.OnOverallPeerScoreUpdated(snapshot.Score) 236 g.collector.OnAppSpecificScoreUpdated(snapshot.AppSpecificScore) 237 g.collector.OnIPColocationFactorUpdated(snapshot.IPColocationFactor) 238 g.collector.OnBehaviourPenaltyUpdated(snapshot.BehaviourPenalty) 239 240 for topic, topicSnapshot := range snapshot.Topics { 241 lg = lg.With(). 242 Str("topic", topic). 243 Dur("time_in_mesh", topicSnapshot.TimeInMesh). 244 Float64("first_message_deliveries", topicSnapshot.FirstMessageDeliveries). 245 Float64("mesh_message_deliveries", topicSnapshot.MeshMessageDeliveries). 246 Float64("invalid_messages", topicSnapshot.InvalidMessageDeliveries).Logger() 247 248 g.collector.OnFirstMessageDeliveredUpdated(channels.Topic(topic), topicSnapshot.FirstMessageDeliveries) 249 g.collector.OnMeshMessageDeliveredUpdated(channels.Topic(topic), topicSnapshot.MeshMessageDeliveries) 250 g.collector.OnInvalidMessageDeliveredUpdated(channels.Topic(topic), topicSnapshot.InvalidMessageDeliveries) 251 g.collector.OnTimeInMeshUpdated(channels.Topic(topic), topicSnapshot.TimeInMesh) 252 } 253 254 if snapshot.IsWarning() { 255 lg.Warn().Msg(PeerScoreLogMessage) 256 return true 257 } 258 259 lg.Debug().Msg(PeerScoreLogMessage) 260 return false 261 }