github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/module/metrics/unicast_manager.go (about)

     1  package metrics
     2  
     3  import (
     4  	"time"
     5  
     6  	"github.com/prometheus/client_golang/prometheus"
     7  	"github.com/prometheus/client_golang/prometheus/promauto"
     8  
     9  	"github.com/onflow/flow-go/module"
    10  )
    11  
    12  // UnicastManagerMetrics metrics collector for the unicast manager.
    13  type UnicastManagerMetrics struct {
    14  	// Tracks the number of times a stream creation is retried due to dial-backoff.
    15  	createStreamRetriesDueToDialBackoff *prometheus.HistogramVec
    16  	// Tracks the overall time it takes to create a stream, including dialing the peer and connecting to the peer due to dial-backoff.
    17  	createStreamTimeDueToDialBackoff *prometheus.HistogramVec
    18  	// Tracks the number of retry attempts to dial a peer during stream creation.
    19  	dialPeerRetries *prometheus.HistogramVec
    20  	// Tracks the time it takes to dial a peer and establish a connection during stream creation.
    21  	dialPeerTime *prometheus.HistogramVec
    22  	// Tracks the number of retry attempts to create the stream after peer dialing completes and a connection is established.
    23  	createStreamOnConnRetries *prometheus.HistogramVec
    24  	// Tracks the time it takes to create the stream after peer dialing completes and a connection is established.
    25  	createStreamOnConnTime *prometheus.HistogramVec
    26  	// Tracks the history of the stream retry budget updates.
    27  	streamRetryBudgetUpdates prometheus.Histogram
    28  	// Tracks the history of the dial retry budget updates.
    29  	dialRetryBudgetUpdates prometheus.Histogram
    30  	// Tracks the number of times the dial retry budget is reset to default.
    31  	dialRetryBudgetResetToDefault prometheus.Counter
    32  	// Tracks the number of times the stream creation retry budget is reset to default.
    33  	streamCreationRetryBudgetResetToDefault prometheus.Counter
    34  
    35  	prefix string
    36  }
    37  
    38  var _ module.UnicastManagerMetrics = (*UnicastManagerMetrics)(nil)
    39  
    40  func NewUnicastManagerMetrics(prefix string) *UnicastManagerMetrics {
    41  	uc := &UnicastManagerMetrics{prefix: prefix}
    42  
    43  	uc.createStreamRetriesDueToDialBackoff = promauto.NewHistogramVec(
    44  		prometheus.HistogramOpts{
    45  			Namespace: namespaceNetwork,
    46  			Subsystem: subsystemGossip,
    47  			Name:      uc.prefix + "attempts_to_create_stream_due_to_in_progress_dial_total",
    48  			Help:      "the number of times a stream creation is retried due to a dial in progress",
    49  			Buckets:   []float64{1, 2, 3},
    50  		}, []string{LabelSuccess},
    51  	)
    52  
    53  	uc.createStreamTimeDueToDialBackoff = promauto.NewHistogramVec(
    54  		prometheus.HistogramOpts{
    55  			Namespace: namespaceNetwork,
    56  			Subsystem: subsystemGossip,
    57  			Name:      uc.prefix + "overall_time_to_create_stream_seconds",
    58  			Help:      "the amount of time it takes to create a stream successfully in seconds including the time to create a connection when needed",
    59  			Buckets:   []float64{0.01, 0.1, 0.5, 1, 2, 5},
    60  		}, []string{LabelSuccess},
    61  	)
    62  
    63  	uc.dialPeerRetries = promauto.NewHistogramVec(
    64  		prometheus.HistogramOpts{
    65  			Namespace: namespaceNetwork,
    66  			Subsystem: subsystemGossip,
    67  			Name:      uc.prefix + "attempts_to_dial_peer_total",
    68  			Help:      "number of retry attempts before a connection is established successfully",
    69  			Buckets:   []float64{1, 2, 3},
    70  		}, []string{LabelSuccess},
    71  	)
    72  
    73  	uc.dialPeerTime = promauto.NewHistogramVec(
    74  		prometheus.HistogramOpts{
    75  			Namespace: namespaceNetwork,
    76  			Subsystem: subsystemGossip,
    77  			Name:      uc.prefix + "time_to_dial_peer_seconds",
    78  			Help:      "the amount of time it takes to dial a peer and establish a connection during stream creation",
    79  			Buckets:   []float64{0.01, 0.1, 0.5, 1, 2, 5},
    80  		}, []string{LabelSuccess},
    81  	)
    82  
    83  	uc.createStreamOnConnRetries = promauto.NewHistogramVec(
    84  		prometheus.HistogramOpts{
    85  			Namespace: namespaceNetwork,
    86  			Subsystem: subsystemGossip,
    87  			Name:      uc.prefix + "attempts_to_create_stream_on_connection_total",
    88  			Help:      "number of retry attempts before a stream is created on the available connection between two peers",
    89  			Buckets:   []float64{1, 2, 3},
    90  		}, []string{LabelSuccess},
    91  	)
    92  
    93  	uc.createStreamOnConnTime = promauto.NewHistogramVec(
    94  		prometheus.HistogramOpts{
    95  			Namespace: namespaceNetwork,
    96  			Subsystem: subsystemGossip,
    97  			Name:      uc.prefix + "time_to_create_stream_on_connection_seconds",
    98  			Help:      "the amount of time it takes to create a stream on the available connection between two peers",
    99  			Buckets:   []float64{0.01, 0.1, 0.5, 1, 2, 5},
   100  		}, []string{LabelSuccess},
   101  	)
   102  
   103  	uc.streamRetryBudgetUpdates = prometheus.NewHistogram(
   104  		prometheus.HistogramOpts{
   105  			Namespace: namespaceNetwork,
   106  			Subsystem: subsystemGossip,
   107  			Name:      uc.prefix + "stream_creation_retry_budget",
   108  			Help:      "the history of the stream retry budget updates",
   109  			Buckets:   []float64{1, 2, 3, 4, 5, 10},
   110  		},
   111  	)
   112  
   113  	uc.dialRetryBudgetUpdates = prometheus.NewHistogram(
   114  		prometheus.HistogramOpts{
   115  			Namespace: namespaceNetwork,
   116  			Subsystem: subsystemGossip,
   117  			Name:      uc.prefix + "dial_retry_budget",
   118  			Help:      "the history of the dial retry budget updates",
   119  			Buckets:   []float64{1, 2, 3, 4, 5, 10},
   120  		},
   121  	)
   122  
   123  	uc.streamCreationRetryBudgetResetToDefault = promauto.NewCounter(
   124  		prometheus.CounterOpts{
   125  			Namespace: namespaceNetwork,
   126  			Subsystem: subsystemGossip,
   127  			Name:      uc.prefix + "stream_creation_retry_budget_reset_to_default_total",
   128  			Help:      "the number of times the stream creation retry budget is reset to default by the unicast manager",
   129  		})
   130  
   131  	uc.dialRetryBudgetResetToDefault = promauto.NewCounter(
   132  		prometheus.CounterOpts{
   133  			Namespace: namespaceNetwork,
   134  			Subsystem: subsystemGossip,
   135  			Name:      uc.prefix + "dial_retry_budget_reset_to_default_total",
   136  			Help:      "the number of times the dial retry budget is reset to default by the unicast manager",
   137  		})
   138  
   139  	return uc
   140  }
   141  
   142  // OnStreamCreated tracks the overall time taken to create a stream successfully and the number of retry attempts.
   143  func (u *UnicastManagerMetrics) OnStreamCreated(duration time.Duration, attempts int) {
   144  	u.createStreamRetriesDueToDialBackoff.WithLabelValues("true").Observe(float64(attempts))
   145  	u.createStreamTimeDueToDialBackoff.WithLabelValues("true").Observe(duration.Seconds())
   146  }
   147  
   148  // OnStreamCreationFailure tracks the overall time taken and number of retry attempts used when the unicast manager fails to create a stream.
   149  func (u *UnicastManagerMetrics) OnStreamCreationFailure(duration time.Duration, attempts int) {
   150  	u.createStreamRetriesDueToDialBackoff.WithLabelValues("false").Observe(float64(attempts))
   151  	u.createStreamTimeDueToDialBackoff.WithLabelValues("false").Observe(duration.Seconds())
   152  }
   153  
   154  // OnPeerDialed tracks the time it takes to dial a peer during stream creation and the number of retry attempts before a peer
   155  // is dialed successfully.
   156  func (u *UnicastManagerMetrics) OnPeerDialed(duration time.Duration, attempts int) {
   157  	u.dialPeerRetries.WithLabelValues("true").Observe(float64(attempts))
   158  	u.dialPeerTime.WithLabelValues("true").Observe(duration.Seconds())
   159  }
   160  
   161  // OnPeerDialFailure tracks the amount of time taken and number of retry attempts used when the unicast manager cannot dial a peer
   162  // to establish the initial connection between the two.
   163  func (u *UnicastManagerMetrics) OnPeerDialFailure(duration time.Duration, attempts int) {
   164  	u.dialPeerRetries.WithLabelValues("false").Observe(float64(attempts))
   165  	u.dialPeerTime.WithLabelValues("false").Observe(duration.Seconds())
   166  }
   167  
   168  // OnStreamEstablished tracks the time it takes to create a stream successfully on the available open connection during stream
   169  // creation and the number of retry attempts.
   170  func (u *UnicastManagerMetrics) OnStreamEstablished(duration time.Duration, attempts int) {
   171  	u.createStreamOnConnRetries.WithLabelValues("true").Observe(float64(attempts))
   172  	u.createStreamOnConnTime.WithLabelValues("true").Observe(duration.Seconds())
   173  }
   174  
   175  // OnEstablishStreamFailure tracks the amount of time taken and number of retry attempts used when the unicast manager cannot establish
   176  // a stream on the open connection between two peers.
   177  func (u *UnicastManagerMetrics) OnEstablishStreamFailure(duration time.Duration, attempts int) {
   178  	u.createStreamOnConnRetries.WithLabelValues("false").Observe(float64(attempts))
   179  	u.createStreamOnConnTime.WithLabelValues("false").Observe(duration.Seconds())
   180  }
   181  
   182  // OnStreamCreationRetryBudgetUpdated tracks the history of the stream creation retry budget updates.
   183  func (u *UnicastManagerMetrics) OnStreamCreationRetryBudgetUpdated(budget uint64) {
   184  	u.dialRetryBudgetUpdates.Observe(float64(budget))
   185  }
   186  
   187  // OnDialRetryBudgetUpdated tracks the history of the dial retry budget updates.
   188  func (u *UnicastManagerMetrics) OnDialRetryBudgetUpdated(budget uint64) {
   189  	u.streamRetryBudgetUpdates.Observe(float64(budget))
   190  }
   191  
   192  // OnDialRetryBudgetResetToDefault tracks the number of times the dial retry budget is reset to default.
   193  func (u *UnicastManagerMetrics) OnDialRetryBudgetResetToDefault() {
   194  	u.dialRetryBudgetResetToDefault.Inc()
   195  }
   196  
   197  // OnStreamCreationRetryBudgetResetToDefault tracks the number of times the stream creation retry budget is reset to default.
   198  func (u *UnicastManagerMetrics) OnStreamCreationRetryBudgetResetToDefault() {
   199  	u.streamCreationRetryBudgetResetToDefault.Inc()
   200  }