github.com/koko1123/flow-go-1@v0.29.6/module/metrics/hotstuff.go (about) 1 package metrics 2 3 import ( 4 "time" 5 6 "github.com/prometheus/client_golang/prometheus" 7 "github.com/prometheus/client_golang/prometheus/promauto" 8 9 "github.com/koko1123/flow-go-1/model/flow" 10 ) 11 12 // HotStuff Metrics 13 const ( 14 HotstuffEventTypeTimeout = "timeout" 15 HotstuffEventTypeOnProposal = "onproposal" 16 HotstuffEventTypeOnVote = "onvote" 17 HotstuffEventTypeOnQC = "onqc" 18 ) 19 20 // HotstuffCollector implements only the metrics emitted by the HotStuff core logic. 21 // We have multiple instances of HotStuff running within Flow: Consensus Nodes form 22 // the main consensus committee. In addition each Collector node cluster runs their 23 // own HotStuff instance. Depending on the node role, the name space is different. Furthermore, 24 // even within the `collection` name space, we need to separate metrics between the different 25 // clusters. We do this by adding the label `committeeID` to the HotStuff metrics and 26 // allowing for configurable name space. 27 type HotstuffCollector struct { 28 busyDuration *prometheus.HistogramVec 29 idleDuration prometheus.Histogram 30 waitDuration *prometheus.HistogramVec 31 curView prometheus.Gauge 32 qcView prometheus.Gauge 33 skips prometheus.Counter 34 timeouts prometheus.Counter 35 timeoutDuration prometheus.Gauge 36 committeeComputationsDuration prometheus.Histogram 37 signerComputationsDuration prometheus.Histogram 38 validatorComputationsDuration prometheus.Histogram 39 payloadProductionDuration prometheus.Histogram 40 } 41 42 func NewHotstuffCollector(chain flow.ChainID) *HotstuffCollector { 43 44 hc := &HotstuffCollector{ 45 46 busyDuration: promauto.NewHistogramVec(prometheus.HistogramOpts{ 47 Name: "busy_duration_seconds", 48 Namespace: namespaceConsensus, 49 Subsystem: subsystemHotstuff, 50 Help: "duration [seconds; measured with float64 precision] of how long HotStuff's event loop has been busy processing one event", 51 Buckets: []float64{0.05, 0.2, 0.5, 1, 2, 5}, 52 ConstLabels: prometheus.Labels{LabelChain: chain.String()}, 53 }, []string{"event_type"}), 54 55 idleDuration: promauto.NewHistogram(prometheus.HistogramOpts{ 56 Name: "idle_duration_seconds", 57 Namespace: namespaceConsensus, 58 Subsystem: subsystemHotstuff, 59 Help: "duration [seconds; measured with float64 precision] of how long HotStuff's event loop has been idle without processing any event", 60 Buckets: []float64{0.05, 0.2, 0.5, 1, 2, 5}, 61 ConstLabels: prometheus.Labels{LabelChain: chain.String()}, 62 }), 63 64 waitDuration: promauto.NewHistogramVec(prometheus.HistogramOpts{ 65 Name: "wait_duration_seconds", 66 Namespace: namespaceConsensus, 67 Subsystem: subsystemHotstuff, 68 Help: "duration [seconds; measured with float64 precision] of how long an event has been waited in the HotStuff event loop queue before being processed.", 69 Buckets: []float64{0.05, 0.2, 0.5, 1, 2, 5}, 70 ConstLabels: prometheus.Labels{LabelChain: chain.String()}, 71 }, []string{"event_type"}), 72 73 curView: promauto.NewGauge(prometheus.GaugeOpts{ 74 Name: "cur_view", 75 Namespace: namespaceConsensus, 76 Subsystem: subsystemHotstuff, 77 Help: "the current view that the event handler has entered", 78 ConstLabels: prometheus.Labels{LabelChain: chain.String()}, 79 }), 80 81 qcView: promauto.NewGauge(prometheus.GaugeOpts{ 82 Name: "qc_view", 83 Namespace: namespaceConsensus, 84 Subsystem: subsystemHotstuff, 85 Help: "The view of the newest known qc from HotStuff", 86 ConstLabels: prometheus.Labels{LabelChain: chain.String()}, 87 }), 88 89 skips: promauto.NewCounter(prometheus.CounterOpts{ 90 Name: "skips_total", 91 Namespace: namespaceConsensus, 92 Subsystem: subsystemHotstuff, 93 Help: "The number of times we skipped ahead some views", 94 ConstLabels: prometheus.Labels{LabelChain: chain.String()}, 95 }), 96 97 timeouts: promauto.NewCounter(prometheus.CounterOpts{ 98 Name: "timeouts_total", 99 Namespace: namespaceConsensus, 100 Subsystem: subsystemHotstuff, 101 Help: "The number of times we timed out during a view", 102 ConstLabels: prometheus.Labels{LabelChain: chain.String()}, 103 }), 104 105 timeoutDuration: promauto.NewGauge(prometheus.GaugeOpts{ 106 Name: "timeout_seconds", 107 Namespace: namespaceConsensus, 108 Subsystem: subsystemHotstuff, 109 Help: "The current length of the timeout", 110 ConstLabels: prometheus.Labels{LabelChain: chain.String()}, 111 }), 112 113 committeeComputationsDuration: promauto.NewHistogram(prometheus.HistogramOpts{ 114 Name: "committee_computations_seconds", 115 Namespace: namespaceConsensus, 116 Subsystem: subsystemHotstuff, 117 Help: "duration [seconds; measured with float64 precision] of how long HotStuff sends computing consensus committee relations", 118 Buckets: []float64{0.02, 0.05, 0.1, 0.2, 0.5, 1, 2}, 119 ConstLabels: prometheus.Labels{LabelChain: chain.String()}, 120 }), 121 122 signerComputationsDuration: promauto.NewHistogram(prometheus.HistogramOpts{ 123 Name: "crypto_computations_seconds", 124 Namespace: namespaceConsensus, 125 Subsystem: subsystemHotstuff, 126 Help: "duration [seconds; measured with float64 precision] of how long HotStuff sends with crypto-related operations", 127 Buckets: []float64{0.02, 0.05, 0.1, 0.2, 0.5, 1, 2}, 128 ConstLabels: prometheus.Labels{LabelChain: chain.String()}, 129 }), 130 131 validatorComputationsDuration: promauto.NewHistogram(prometheus.HistogramOpts{ 132 Name: "message_validation_seconds", 133 Namespace: namespaceConsensus, 134 Subsystem: subsystemHotstuff, 135 Help: "duration [seconds; measured with float64 precision] of how long HotStuff sends with message-validation", 136 Buckets: []float64{0.02, 0.05, 0.1, 0.2, 0.5, 1, 2}, 137 ConstLabels: prometheus.Labels{LabelChain: chain.String()}, 138 }), 139 140 payloadProductionDuration: promauto.NewHistogram(prometheus.HistogramOpts{ 141 Name: "payload_production_seconds", 142 Namespace: namespaceConsensus, 143 Subsystem: subsystemHotstuff, 144 Help: "duration [seconds; measured with float64 precision] of how long HotStuff sends with payload production", 145 Buckets: []float64{0.02, 0.05, 0.1, 0.2, 0.5, 1, 2}, 146 ConstLabels: prometheus.Labels{LabelChain: chain.String()}, 147 }), 148 } 149 150 return hc 151 } 152 153 // HotStuffBusyDuration reports Metrics C6 HotStuff Busy Duration 154 func (hc *HotstuffCollector) HotStuffBusyDuration(duration time.Duration, event string) { 155 hc.busyDuration.WithLabelValues(event).Observe(duration.Seconds()) // unit: seconds; with float64 precision 156 } 157 158 // HotStuffIdleDuration reports Metrics C6 HotStuff Idle Duration 159 func (hc *HotstuffCollector) HotStuffIdleDuration(duration time.Duration) { 160 hc.idleDuration.Observe(duration.Seconds()) // unit: seconds; with float64 precision 161 } 162 163 // HotStuffWaitDuration reports Metrics C6 HotStuff Wait Duration 164 func (hc *HotstuffCollector) HotStuffWaitDuration(duration time.Duration, event string) { 165 hc.waitDuration.WithLabelValues(event).Observe(duration.Seconds()) // unit: seconds; with float64 precision 166 } 167 168 // HotstuffCollector reports Metrics C8: Current View 169 func (hc *HotstuffCollector) SetCurView(view uint64) { 170 hc.curView.Set(float64(view)) 171 } 172 173 // NewestKnownQC reports Metrics C9: View of Newest Known QC 174 func (hc *HotstuffCollector) SetQCView(view uint64) { 175 hc.qcView.Set(float64(view)) 176 } 177 178 // CountSkipped counts the number of skips we did. 179 func (hc *HotstuffCollector) CountSkipped() { 180 hc.skips.Inc() 181 } 182 183 // CountTimeout counts the number of timeouts we had. 184 func (hc *HotstuffCollector) CountTimeout() { 185 hc.timeouts.Inc() 186 } 187 188 // SetTimeout sets the current timeout duration. 189 func (hc *HotstuffCollector) SetTimeout(duration time.Duration) { 190 hc.timeoutDuration.Set(duration.Seconds()) // unit: seconds; with float64 precision 191 } 192 193 // CommitteeProcessingDuration measures the time which the HotStuff's core logic 194 // spends in the hotstuff.Committee component, i.e. the time determining consensus 195 // committee relations. 196 func (hc *HotstuffCollector) CommitteeProcessingDuration(duration time.Duration) { 197 hc.committeeComputationsDuration.Observe(duration.Seconds()) // unit: seconds; with float64 precision 198 } 199 200 // SignerProcessingDuration reports the time which the HotStuff's core logic 201 // spends in the hotstuff.Signer component, i.e. the with crypto-related operations. 202 func (hc *HotstuffCollector) SignerProcessingDuration(duration time.Duration) { 203 hc.signerComputationsDuration.Observe(duration.Seconds()) // unit: seconds; with float64 precision 204 } 205 206 // ValidatorProcessingDuration reports the time which the HotStuff's core logic 207 // spends in the hotstuff.Validator component, i.e. the with verifying higher-level 208 // consensus messages. 209 func (hc *HotstuffCollector) ValidatorProcessingDuration(duration time.Duration) { 210 hc.validatorComputationsDuration.Observe(duration.Seconds()) // unit: seconds; with float64 precision 211 } 212 213 // PayloadProductionDuration reports the time which the HotStuff's core logic 214 // spends in the module.Builder component, i.e. the with generating block payloads 215 func (hc *HotstuffCollector) PayloadProductionDuration(duration time.Duration) { 216 hc.payloadProductionDuration.Observe(duration.Seconds()) // unit: seconds; with float64 precision 217 }