github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/network/p2p/tracer/gossipSubScoreTracer.go (about)

     1  package tracer
     2  
     3  import (
     4  	"sync"
     5  	"time"
     6  
     7  	"github.com/libp2p/go-libp2p/core/peer"
     8  	"github.com/rs/zerolog"
     9  
    10  	"github.com/onflow/flow-go/module"
    11  	"github.com/onflow/flow-go/module/component"
    12  	"github.com/onflow/flow-go/module/irrecoverable"
    13  	"github.com/onflow/flow-go/network/channels"
    14  	"github.com/onflow/flow-go/network/p2p"
    15  	p2plogging "github.com/onflow/flow-go/network/p2p/logging"
    16  	"github.com/onflow/flow-go/utils/logging"
    17  )
    18  
    19  const (
    20  	PeerScoreLogMessage = "peer score snapshot update"
    21  )
    22  
    23  // GossipSubScoreTracer is a tracer that keeps track of the peer scores of the gossipsub router.
    24  // It is used to log the peer scores at regular intervals.
    25  type GossipSubScoreTracer struct {
    26  	component.Component
    27  
    28  	updateInterval time.Duration // interval at which it is expecting to receive updates from the gossipsub router
    29  	logger         zerolog.Logger
    30  	collector      module.GossipSubScoringMetrics
    31  
    32  	snapshotUpdate    chan struct{} // a channel to notify the snapshot update.
    33  	snapshotLock      sync.RWMutex
    34  	snapshot          map[peer.ID]*p2p.PeerScoreSnapshot
    35  	snapShotUpdateReq chan map[peer.ID]*p2p.PeerScoreSnapshot
    36  	idProvider        module.IdentityProvider
    37  }
    38  
    39  var _ p2p.PeerScoreTracer = (*GossipSubScoreTracer)(nil)
    40  
    41  func NewGossipSubScoreTracer(
    42  	logger zerolog.Logger,
    43  	provider module.IdentityProvider,
    44  	collector module.GossipSubScoringMetrics,
    45  	updateInterval time.Duration) *GossipSubScoreTracer {
    46  	g := &GossipSubScoreTracer{
    47  		logger:            logger.With().Str("component", "gossipsub_score_tracer").Logger(),
    48  		updateInterval:    updateInterval,
    49  		collector:         collector,
    50  		snapshotUpdate:    make(chan struct{}, 1),
    51  		snapShotUpdateReq: make(chan map[peer.ID]*p2p.PeerScoreSnapshot, 1),
    52  		snapshot:          make(map[peer.ID]*p2p.PeerScoreSnapshot),
    53  		idProvider:        provider,
    54  	}
    55  
    56  	g.Component = component.NewComponentManagerBuilder().
    57  		AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) {
    58  			ready()
    59  			g.logLoop(ctx)
    60  		}).
    61  		Build()
    62  
    63  	return g
    64  }
    65  
    66  // UpdatePeerScoreSnapshots updates the tracer's snapshot of the peer scores. It is called by the gossipsub router.
    67  // It is non-blocking and asynchrounous. If there is no update pending, it queues an update. If there is an update pending,
    68  // it drops the update.
    69  func (g *GossipSubScoreTracer) UpdatePeerScoreSnapshots(snapshot map[peer.ID]*p2p.PeerScoreSnapshot) {
    70  	select {
    71  	case g.snapShotUpdateReq <- snapshot:
    72  	default:
    73  		// if the channel is full, we drop the update. This should rarely happen as the log loop should be able to keep up.
    74  		// if it does happen, it means that the log loop is not running or is blocked. In this case, we don't want to block
    75  		// the main thread.
    76  		g.logger.Warn().Msg("dropping peer score snapshot update, channel full")
    77  	}
    78  }
    79  
    80  // UpdateInterval returns the interval at which the tracer expects to receive updates from the gossipsub router.
    81  func (g *GossipSubScoreTracer) UpdateInterval() time.Duration {
    82  	return g.updateInterval
    83  }
    84  
    85  // GetScore returns the overall score for the given peer.
    86  func (g *GossipSubScoreTracer) GetScore(peerID peer.ID) (float64, bool) {
    87  	g.snapshotLock.RLock()
    88  	defer g.snapshotLock.RUnlock()
    89  
    90  	if snapshot, ok := g.snapshot[peerID]; ok {
    91  		return snapshot.Score, true
    92  	}
    93  
    94  	return 0, false
    95  }
    96  
    97  // GetAppScore returns the application score for the given peer.
    98  func (g *GossipSubScoreTracer) GetAppScore(peerID peer.ID) (float64, bool) {
    99  	g.snapshotLock.RLock()
   100  	defer g.snapshotLock.RUnlock()
   101  
   102  	if snapshot, ok := g.snapshot[peerID]; ok {
   103  		return snapshot.AppSpecificScore, true
   104  	}
   105  
   106  	return 0, false
   107  }
   108  
   109  // GetIPColocationFactor returns the IP colocation factor for the given peer.
   110  func (g *GossipSubScoreTracer) GetIPColocationFactor(peerID peer.ID) (float64, bool) {
   111  	g.snapshotLock.RLock()
   112  	defer g.snapshotLock.RUnlock()
   113  
   114  	if snapshot, ok := g.snapshot[peerID]; ok {
   115  		return snapshot.IPColocationFactor, true
   116  	}
   117  
   118  	return 0, false
   119  }
   120  
   121  // GetBehaviourPenalty returns the behaviour penalty for the given peer.
   122  func (g *GossipSubScoreTracer) GetBehaviourPenalty(peerID peer.ID) (float64, bool) {
   123  	g.snapshotLock.RLock()
   124  	defer g.snapshotLock.RUnlock()
   125  
   126  	if snapshot, ok := g.snapshot[peerID]; ok {
   127  		return snapshot.BehaviourPenalty, true
   128  	}
   129  
   130  	return 0, false
   131  }
   132  
   133  // GetTopicScores returns the topic scores for the given peer.
   134  // The returned map is keyed by topic name.
   135  func (g *GossipSubScoreTracer) GetTopicScores(peerID peer.ID) (map[string]p2p.TopicScoreSnapshot, bool) {
   136  	g.snapshotLock.RLock()
   137  	defer g.snapshotLock.RUnlock()
   138  
   139  	snapshot, ok := g.snapshot[peerID]
   140  	if !ok {
   141  		return nil, false
   142  	}
   143  
   144  	topicsSnapshot := make(map[string]p2p.TopicScoreSnapshot)
   145  	// copy the topic scores into a new map
   146  	for topic, topicSnapshot := range snapshot.Topics {
   147  		topicsSnapshot[topic] = *topicSnapshot
   148  	}
   149  
   150  	return topicsSnapshot, true
   151  }
   152  
   153  func (g *GossipSubScoreTracer) logLoop(ctx irrecoverable.SignalerContext) {
   154  	g.logger.Debug().Msg("starting log loop")
   155  	for {
   156  		select {
   157  		case <-ctx.Done():
   158  			g.logger.Debug().Msg("stopping log loop")
   159  			return
   160  		default:
   161  		}
   162  
   163  		select {
   164  		case <-ctx.Done():
   165  			g.logger.Debug().Msg("stopping log loop")
   166  			return
   167  		case snapshot := <-g.snapShotUpdateReq:
   168  			g.logger.Debug().Msg("received snapshot update")
   169  			g.updateSnapshot(snapshot)
   170  			g.logger.Debug().Msg("snapshot updated")
   171  			g.logPeerScores()
   172  			g.logger.Debug().Msg("peer scores logged")
   173  		}
   174  	}
   175  }
   176  
   177  // updateSnapshot updates the tracer's snapshot of the peer scores.
   178  // It is called by the log loop, it is a blocking and synchronous call.
   179  func (g *GossipSubScoreTracer) updateSnapshot(snapshot map[peer.ID]*p2p.PeerScoreSnapshot) {
   180  	g.snapshotLock.Lock()
   181  	defer g.snapshotLock.Unlock()
   182  
   183  	g.snapshot = snapshot
   184  }
   185  
   186  // logPeerScores logs the peer score snapshots for all peers.
   187  func (g *GossipSubScoreTracer) logPeerScores() {
   188  	g.snapshotLock.RLock()
   189  	defer g.snapshotLock.RUnlock()
   190  
   191  	g.logger.Debug().Msg("logging peer scores")
   192  	warningStateCount := uint(0)
   193  
   194  	for peerID := range g.snapshot {
   195  		warning := g.logPeerScore(peerID)
   196  		if warning {
   197  			warningStateCount++
   198  		}
   199  	}
   200  
   201  	g.collector.SetWarningStateCount(warningStateCount)
   202  	g.logger.Debug().Msg("finished logging peer scores")
   203  }
   204  
   205  // logPeerScore logs the peer score snapshot for the given peer.
   206  // It also updates the score-related metrics.
   207  // The return value indicates whether the peer score is in the warning state.
   208  // Note: this function is not thread-safe and should be called with the lock held.
   209  func (g *GossipSubScoreTracer) logPeerScore(peerID peer.ID) bool {
   210  	snapshot, ok := g.snapshot[peerID]
   211  	if !ok {
   212  		return false
   213  	}
   214  
   215  	var lg zerolog.Logger
   216  
   217  	identity, valid := g.idProvider.ByPeerID(peerID)
   218  	if !valid {
   219  		lg = g.logger.With().
   220  			Str("flow_id", "unknown").
   221  			Str("role", "unknown").Logger()
   222  	} else {
   223  		lg = g.logger.With().
   224  			Hex("flow_id", logging.ID(identity.NodeID)).
   225  			Str("role", identity.Role.String()).Logger()
   226  	}
   227  
   228  	lg = lg.With().
   229  		Str("peer_id", p2plogging.PeerId(peerID)).
   230  		Float64("overall_score", snapshot.Score).
   231  		Float64("app_specific_score", snapshot.AppSpecificScore).
   232  		Float64("ip_colocation_factor", snapshot.IPColocationFactor).
   233  		Float64("behaviour_penalty", snapshot.BehaviourPenalty).Logger()
   234  
   235  	g.collector.OnOverallPeerScoreUpdated(snapshot.Score)
   236  	g.collector.OnAppSpecificScoreUpdated(snapshot.AppSpecificScore)
   237  	g.collector.OnIPColocationFactorUpdated(snapshot.IPColocationFactor)
   238  	g.collector.OnBehaviourPenaltyUpdated(snapshot.BehaviourPenalty)
   239  
   240  	for topic, topicSnapshot := range snapshot.Topics {
   241  		lg = lg.With().
   242  			Str("topic", topic).
   243  			Dur("time_in_mesh", topicSnapshot.TimeInMesh).
   244  			Float64("first_message_deliveries", topicSnapshot.FirstMessageDeliveries).
   245  			Float64("mesh_message_deliveries", topicSnapshot.MeshMessageDeliveries).
   246  			Float64("invalid_messages", topicSnapshot.InvalidMessageDeliveries).Logger()
   247  
   248  		g.collector.OnFirstMessageDeliveredUpdated(channels.Topic(topic), topicSnapshot.FirstMessageDeliveries)
   249  		g.collector.OnMeshMessageDeliveredUpdated(channels.Topic(topic), topicSnapshot.MeshMessageDeliveries)
   250  		g.collector.OnInvalidMessageDeliveredUpdated(channels.Topic(topic), topicSnapshot.InvalidMessageDeliveries)
   251  		g.collector.OnTimeInMeshUpdated(channels.Topic(topic), topicSnapshot.TimeInMesh)
   252  	}
   253  
   254  	if snapshot.IsWarning() {
   255  		lg.Warn().Msg(PeerScoreLogMessage)
   256  		return true
   257  	}
   258  
   259  	lg.Debug().Msg(PeerScoreLogMessage)
   260  	return false
   261  }