github.com/MetalBlockchain/metalgo@v1.11.9/snow/consensus/snowman/metrics.go (about) 1 // Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. 2 // See the file LICENSE for licensing terms. 3 4 package snowman 5 6 import ( 7 "time" 8 9 "github.com/prometheus/client_golang/prometheus" 10 "go.uber.org/zap" 11 12 "github.com/MetalBlockchain/metalgo/ids" 13 "github.com/MetalBlockchain/metalgo/snow/choices" 14 "github.com/MetalBlockchain/metalgo/utils/linked" 15 "github.com/MetalBlockchain/metalgo/utils/logging" 16 "github.com/MetalBlockchain/metalgo/utils/metric" 17 "github.com/MetalBlockchain/metalgo/utils/wrappers" 18 ) 19 20 type processingStart struct { 21 time time.Time 22 pollNumber uint64 23 } 24 25 type metrics struct { 26 log logging.Logger 27 28 currentMaxVerifiedHeight uint64 29 maxVerifiedHeight prometheus.Gauge 30 31 lastAcceptedHeight prometheus.Gauge 32 lastAcceptedTimestamp prometheus.Gauge 33 34 // processingBlocks keeps track of the [processingStart] that each block was 35 // issued into the consensus instance. This is used to calculate the amount 36 // of time to accept or reject the block. 37 processingBlocks *linked.Hashmap[ids.ID, processingStart] 38 39 // numProcessing keeps track of the number of processing blocks 40 numProcessing prometheus.Gauge 41 42 blockSizeAcceptedSum prometheus.Gauge 43 // pollsAccepted tracks the number of polls that a block was in processing 44 // for before being accepted 45 pollsAccepted metric.Averager 46 // latAccepted tracks the number of nanoseconds that a block was processing 47 // before being accepted 48 latAccepted metric.Averager 49 buildLatencyAccepted prometheus.Gauge 50 51 blockSizeRejectedSum prometheus.Gauge 52 // pollsRejected tracks the number of polls that a block was in processing 53 // for before being rejected 54 pollsRejected metric.Averager 55 // latRejected tracks the number of nanoseconds that a block was processing 56 // before being rejected 57 latRejected metric.Averager 58 59 // numFailedPolls keeps track of the number of polls that failed 60 numFailedPolls prometheus.Counter 61 62 // numSuccessfulPolls keeps track of the number of polls that succeeded 63 numSuccessfulPolls prometheus.Counter 64 } 65 66 func newMetrics( 67 log logging.Logger, 68 reg prometheus.Registerer, 69 lastAcceptedHeight uint64, 70 lastAcceptedTime time.Time, 71 ) (*metrics, error) { 72 errs := wrappers.Errs{} 73 m := &metrics{ 74 log: log, 75 currentMaxVerifiedHeight: lastAcceptedHeight, 76 maxVerifiedHeight: prometheus.NewGauge(prometheus.GaugeOpts{ 77 Name: "max_verified_height", 78 Help: "highest verified height", 79 }), 80 lastAcceptedHeight: prometheus.NewGauge(prometheus.GaugeOpts{ 81 Name: "last_accepted_height", 82 Help: "last height accepted", 83 }), 84 lastAcceptedTimestamp: prometheus.NewGauge(prometheus.GaugeOpts{ 85 Name: "last_accepted_timestamp", 86 Help: "timestamp of the last accepted block in unix seconds", 87 }), 88 89 processingBlocks: linked.NewHashmap[ids.ID, processingStart](), 90 91 numProcessing: prometheus.NewGauge(prometheus.GaugeOpts{ 92 Name: "blks_processing", 93 Help: "number of currently processing blocks", 94 }), 95 96 blockSizeAcceptedSum: prometheus.NewGauge(prometheus.GaugeOpts{ 97 Name: "blks_accepted_container_size_sum", 98 Help: "cumulative size of all accepted blocks", 99 }), 100 pollsAccepted: metric.NewAveragerWithErrs( 101 "blks_polls_accepted", 102 "number of polls from the issuance of a block to its acceptance", 103 reg, 104 &errs, 105 ), 106 latAccepted: metric.NewAveragerWithErrs( 107 "blks_accepted", 108 "time (in ns) from the issuance of a block to its acceptance", 109 reg, 110 &errs, 111 ), 112 buildLatencyAccepted: prometheus.NewGauge(prometheus.GaugeOpts{ 113 Name: "blks_build_accept_latency", 114 Help: "time (in ns) from the timestamp of a block to the time it was accepted", 115 }), 116 117 blockSizeRejectedSum: prometheus.NewGauge(prometheus.GaugeOpts{ 118 Name: "blks_rejected_container_size_sum", 119 Help: "cumulative size of all rejected blocks", 120 }), 121 pollsRejected: metric.NewAveragerWithErrs( 122 "blks_polls_rejected", 123 "number of polls from the issuance of a block to its rejection", 124 reg, 125 &errs, 126 ), 127 latRejected: metric.NewAveragerWithErrs( 128 "blks_rejected", 129 "time (in ns) from the issuance of a block to its rejection", 130 reg, 131 &errs, 132 ), 133 134 numSuccessfulPolls: prometheus.NewCounter(prometheus.CounterOpts{ 135 Name: "polls_successful", 136 Help: "number of successful polls", 137 }), 138 numFailedPolls: prometheus.NewCounter(prometheus.CounterOpts{ 139 Name: "polls_failed", 140 Help: "number of failed polls", 141 }), 142 } 143 144 // Initially set the metrics for the last accepted block. 145 m.maxVerifiedHeight.Set(float64(lastAcceptedHeight)) 146 m.lastAcceptedHeight.Set(float64(lastAcceptedHeight)) 147 m.lastAcceptedTimestamp.Set(float64(lastAcceptedTime.Unix())) 148 149 errs.Add( 150 reg.Register(m.maxVerifiedHeight), 151 reg.Register(m.lastAcceptedHeight), 152 reg.Register(m.lastAcceptedTimestamp), 153 reg.Register(m.numProcessing), 154 reg.Register(m.blockSizeAcceptedSum), 155 reg.Register(m.buildLatencyAccepted), 156 reg.Register(m.blockSizeRejectedSum), 157 reg.Register(m.numSuccessfulPolls), 158 reg.Register(m.numFailedPolls), 159 ) 160 return m, errs.Err 161 } 162 163 func (m *metrics) Issued(blkID ids.ID, pollNumber uint64) { 164 m.processingBlocks.Put(blkID, processingStart{ 165 time: time.Now(), 166 pollNumber: pollNumber, 167 }) 168 m.numProcessing.Inc() 169 } 170 171 func (m *metrics) Verified(height uint64) { 172 m.currentMaxVerifiedHeight = max(m.currentMaxVerifiedHeight, height) 173 m.maxVerifiedHeight.Set(float64(m.currentMaxVerifiedHeight)) 174 } 175 176 func (m *metrics) Accepted( 177 blkID ids.ID, 178 height uint64, 179 timestamp time.Time, 180 pollNumber uint64, 181 blockSize int, 182 ) { 183 start, ok := m.processingBlocks.Get(blkID) 184 if !ok { 185 m.log.Error("unable to measure latency", 186 zap.Stringer("blkID", blkID), 187 zap.Stringer("status", choices.Accepted), 188 ) 189 return 190 } 191 m.lastAcceptedHeight.Set(float64(height)) 192 m.lastAcceptedTimestamp.Set(float64(timestamp.Unix())) 193 m.processingBlocks.Delete(blkID) 194 m.numProcessing.Dec() 195 196 m.blockSizeAcceptedSum.Add(float64(blockSize)) 197 198 m.pollsAccepted.Observe(float64(pollNumber - start.pollNumber)) 199 200 now := time.Now() 201 processingDuration := now.Sub(start.time) 202 m.latAccepted.Observe(float64(processingDuration)) 203 204 builtDuration := now.Sub(timestamp) 205 m.buildLatencyAccepted.Add(float64(builtDuration)) 206 } 207 208 func (m *metrics) Rejected(blkID ids.ID, pollNumber uint64, blockSize int) { 209 start, ok := m.processingBlocks.Get(blkID) 210 if !ok { 211 m.log.Error("unable to measure latency", 212 zap.Stringer("blkID", blkID), 213 zap.Stringer("status", choices.Rejected), 214 ) 215 return 216 } 217 m.processingBlocks.Delete(blkID) 218 m.numProcessing.Dec() 219 220 m.blockSizeRejectedSum.Add(float64(blockSize)) 221 222 m.pollsRejected.Observe(float64(pollNumber - start.pollNumber)) 223 224 duration := time.Since(start.time) 225 m.latRejected.Observe(float64(duration)) 226 } 227 228 func (m *metrics) MeasureAndGetOldestDuration() time.Duration { 229 _, oldestOp, exists := m.processingBlocks.Oldest() 230 if !exists { 231 return 0 232 } 233 return time.Since(oldestOp.time) 234 } 235 236 func (m *metrics) SuccessfulPoll() { 237 m.numSuccessfulPolls.Inc() 238 } 239 240 func (m *metrics) FailedPoll() { 241 m.numFailedPolls.Inc() 242 }