github.com/koko1123/flow-go-1@v0.29.6/module/metrics/hotstuff.go (about)

     1  package metrics
     2  
     3  import (
     4  	"time"
     5  
     6  	"github.com/prometheus/client_golang/prometheus"
     7  	"github.com/prometheus/client_golang/prometheus/promauto"
     8  
     9  	"github.com/koko1123/flow-go-1/model/flow"
    10  )
    11  
    12  // HotStuff Metrics
    13  const (
    14  	HotstuffEventTypeTimeout    = "timeout"
    15  	HotstuffEventTypeOnProposal = "onproposal"
    16  	HotstuffEventTypeOnVote     = "onvote"
    17  	HotstuffEventTypeOnQC       = "onqc"
    18  )
    19  
    20  // HotstuffCollector implements only the metrics emitted by the HotStuff core logic.
    21  // We have multiple instances of HotStuff running within Flow: Consensus Nodes form
    22  // the main consensus committee. In addition each Collector node cluster runs their
    23  // own HotStuff instance. Depending on the node role, the name space is different. Furthermore,
    24  // even within the `collection` name space, we need to separate metrics between the different
    25  // clusters. We do this by adding the label `committeeID` to the HotStuff metrics and
    26  // allowing for configurable name space.
    27  type HotstuffCollector struct {
    28  	busyDuration                  *prometheus.HistogramVec
    29  	idleDuration                  prometheus.Histogram
    30  	waitDuration                  *prometheus.HistogramVec
    31  	curView                       prometheus.Gauge
    32  	qcView                        prometheus.Gauge
    33  	skips                         prometheus.Counter
    34  	timeouts                      prometheus.Counter
    35  	timeoutDuration               prometheus.Gauge
    36  	committeeComputationsDuration prometheus.Histogram
    37  	signerComputationsDuration    prometheus.Histogram
    38  	validatorComputationsDuration prometheus.Histogram
    39  	payloadProductionDuration     prometheus.Histogram
    40  }
    41  
    42  func NewHotstuffCollector(chain flow.ChainID) *HotstuffCollector {
    43  
    44  	hc := &HotstuffCollector{
    45  
    46  		busyDuration: promauto.NewHistogramVec(prometheus.HistogramOpts{
    47  			Name:        "busy_duration_seconds",
    48  			Namespace:   namespaceConsensus,
    49  			Subsystem:   subsystemHotstuff,
    50  			Help:        "duration [seconds; measured with float64 precision] of how long HotStuff's event loop has been busy processing one event",
    51  			Buckets:     []float64{0.05, 0.2, 0.5, 1, 2, 5},
    52  			ConstLabels: prometheus.Labels{LabelChain: chain.String()},
    53  		}, []string{"event_type"}),
    54  
    55  		idleDuration: promauto.NewHistogram(prometheus.HistogramOpts{
    56  			Name:        "idle_duration_seconds",
    57  			Namespace:   namespaceConsensus,
    58  			Subsystem:   subsystemHotstuff,
    59  			Help:        "duration [seconds; measured with float64 precision] of how long HotStuff's event loop has been idle without processing any event",
    60  			Buckets:     []float64{0.05, 0.2, 0.5, 1, 2, 5},
    61  			ConstLabels: prometheus.Labels{LabelChain: chain.String()},
    62  		}),
    63  
    64  		waitDuration: promauto.NewHistogramVec(prometheus.HistogramOpts{
    65  			Name:        "wait_duration_seconds",
    66  			Namespace:   namespaceConsensus,
    67  			Subsystem:   subsystemHotstuff,
    68  			Help:        "duration [seconds; measured with float64 precision] of how long an event has been waited in the HotStuff event loop queue before being processed.",
    69  			Buckets:     []float64{0.05, 0.2, 0.5, 1, 2, 5},
    70  			ConstLabels: prometheus.Labels{LabelChain: chain.String()},
    71  		}, []string{"event_type"}),
    72  
    73  		curView: promauto.NewGauge(prometheus.GaugeOpts{
    74  			Name:        "cur_view",
    75  			Namespace:   namespaceConsensus,
    76  			Subsystem:   subsystemHotstuff,
    77  			Help:        "the current view that the event handler has entered",
    78  			ConstLabels: prometheus.Labels{LabelChain: chain.String()},
    79  		}),
    80  
    81  		qcView: promauto.NewGauge(prometheus.GaugeOpts{
    82  			Name:        "qc_view",
    83  			Namespace:   namespaceConsensus,
    84  			Subsystem:   subsystemHotstuff,
    85  			Help:        "The view of the newest known qc from HotStuff",
    86  			ConstLabels: prometheus.Labels{LabelChain: chain.String()},
    87  		}),
    88  
    89  		skips: promauto.NewCounter(prometheus.CounterOpts{
    90  			Name:        "skips_total",
    91  			Namespace:   namespaceConsensus,
    92  			Subsystem:   subsystemHotstuff,
    93  			Help:        "The number of times we skipped ahead some views",
    94  			ConstLabels: prometheus.Labels{LabelChain: chain.String()},
    95  		}),
    96  
    97  		timeouts: promauto.NewCounter(prometheus.CounterOpts{
    98  			Name:        "timeouts_total",
    99  			Namespace:   namespaceConsensus,
   100  			Subsystem:   subsystemHotstuff,
   101  			Help:        "The number of times we timed out during a view",
   102  			ConstLabels: prometheus.Labels{LabelChain: chain.String()},
   103  		}),
   104  
   105  		timeoutDuration: promauto.NewGauge(prometheus.GaugeOpts{
   106  			Name:        "timeout_seconds",
   107  			Namespace:   namespaceConsensus,
   108  			Subsystem:   subsystemHotstuff,
   109  			Help:        "The current length of the timeout",
   110  			ConstLabels: prometheus.Labels{LabelChain: chain.String()},
   111  		}),
   112  
   113  		committeeComputationsDuration: promauto.NewHistogram(prometheus.HistogramOpts{
   114  			Name:        "committee_computations_seconds",
   115  			Namespace:   namespaceConsensus,
   116  			Subsystem:   subsystemHotstuff,
   117  			Help:        "duration [seconds; measured with float64 precision] of how long HotStuff sends computing consensus committee relations",
   118  			Buckets:     []float64{0.02, 0.05, 0.1, 0.2, 0.5, 1, 2},
   119  			ConstLabels: prometheus.Labels{LabelChain: chain.String()},
   120  		}),
   121  
   122  		signerComputationsDuration: promauto.NewHistogram(prometheus.HistogramOpts{
   123  			Name:        "crypto_computations_seconds",
   124  			Namespace:   namespaceConsensus,
   125  			Subsystem:   subsystemHotstuff,
   126  			Help:        "duration [seconds; measured with float64 precision] of how long HotStuff sends with crypto-related operations",
   127  			Buckets:     []float64{0.02, 0.05, 0.1, 0.2, 0.5, 1, 2},
   128  			ConstLabels: prometheus.Labels{LabelChain: chain.String()},
   129  		}),
   130  
   131  		validatorComputationsDuration: promauto.NewHistogram(prometheus.HistogramOpts{
   132  			Name:        "message_validation_seconds",
   133  			Namespace:   namespaceConsensus,
   134  			Subsystem:   subsystemHotstuff,
   135  			Help:        "duration [seconds; measured with float64 precision] of how long HotStuff sends with message-validation",
   136  			Buckets:     []float64{0.02, 0.05, 0.1, 0.2, 0.5, 1, 2},
   137  			ConstLabels: prometheus.Labels{LabelChain: chain.String()},
   138  		}),
   139  
   140  		payloadProductionDuration: promauto.NewHistogram(prometheus.HistogramOpts{
   141  			Name:        "payload_production_seconds",
   142  			Namespace:   namespaceConsensus,
   143  			Subsystem:   subsystemHotstuff,
   144  			Help:        "duration [seconds; measured with float64 precision] of how long HotStuff sends with payload production",
   145  			Buckets:     []float64{0.02, 0.05, 0.1, 0.2, 0.5, 1, 2},
   146  			ConstLabels: prometheus.Labels{LabelChain: chain.String()},
   147  		}),
   148  	}
   149  
   150  	return hc
   151  }
   152  
   153  // HotStuffBusyDuration reports Metrics C6 HotStuff Busy Duration
   154  func (hc *HotstuffCollector) HotStuffBusyDuration(duration time.Duration, event string) {
   155  	hc.busyDuration.WithLabelValues(event).Observe(duration.Seconds()) // unit: seconds; with float64 precision
   156  }
   157  
   158  // HotStuffIdleDuration reports Metrics C6 HotStuff Idle Duration
   159  func (hc *HotstuffCollector) HotStuffIdleDuration(duration time.Duration) {
   160  	hc.idleDuration.Observe(duration.Seconds()) // unit: seconds; with float64 precision
   161  }
   162  
   163  // HotStuffWaitDuration reports Metrics C6 HotStuff Wait Duration
   164  func (hc *HotstuffCollector) HotStuffWaitDuration(duration time.Duration, event string) {
   165  	hc.waitDuration.WithLabelValues(event).Observe(duration.Seconds()) // unit: seconds; with float64 precision
   166  }
   167  
   168  // HotstuffCollector reports Metrics C8: Current View
   169  func (hc *HotstuffCollector) SetCurView(view uint64) {
   170  	hc.curView.Set(float64(view))
   171  }
   172  
   173  // NewestKnownQC reports Metrics C9: View of Newest Known QC
   174  func (hc *HotstuffCollector) SetQCView(view uint64) {
   175  	hc.qcView.Set(float64(view))
   176  }
   177  
   178  // CountSkipped counts the number of skips we did.
   179  func (hc *HotstuffCollector) CountSkipped() {
   180  	hc.skips.Inc()
   181  }
   182  
   183  // CountTimeout counts the number of timeouts we had.
   184  func (hc *HotstuffCollector) CountTimeout() {
   185  	hc.timeouts.Inc()
   186  }
   187  
   188  // SetTimeout sets the current timeout duration.
   189  func (hc *HotstuffCollector) SetTimeout(duration time.Duration) {
   190  	hc.timeoutDuration.Set(duration.Seconds()) // unit: seconds; with float64 precision
   191  }
   192  
   193  // CommitteeProcessingDuration measures the time which the HotStuff's core logic
   194  // spends in the hotstuff.Committee component, i.e. the time determining consensus
   195  // committee relations.
   196  func (hc *HotstuffCollector) CommitteeProcessingDuration(duration time.Duration) {
   197  	hc.committeeComputationsDuration.Observe(duration.Seconds()) // unit: seconds; with float64 precision
   198  }
   199  
   200  // SignerProcessingDuration reports the time which the HotStuff's core logic
   201  // spends in the hotstuff.Signer component, i.e. the with crypto-related operations.
   202  func (hc *HotstuffCollector) SignerProcessingDuration(duration time.Duration) {
   203  	hc.signerComputationsDuration.Observe(duration.Seconds()) // unit: seconds; with float64 precision
   204  }
   205  
   206  // ValidatorProcessingDuration reports the time which the HotStuff's core logic
   207  // spends in the hotstuff.Validator component, i.e. the with verifying higher-level
   208  // consensus messages.
   209  func (hc *HotstuffCollector) ValidatorProcessingDuration(duration time.Duration) {
   210  	hc.validatorComputationsDuration.Observe(duration.Seconds()) // unit: seconds; with float64 precision
   211  }
   212  
   213  // PayloadProductionDuration reports the time which the HotStuff's core logic
   214  // spends in the module.Builder component, i.e. the with generating block payloads
   215  func (hc *HotstuffCollector) PayloadProductionDuration(duration time.Duration) {
   216  	hc.payloadProductionDuration.Observe(duration.Seconds()) // unit: seconds; with float64 precision
   217  }