github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/consensus/hotstuff/notifications/telemetry.go (about) 1 package notifications 2 3 import ( 4 "time" 5 6 "github.com/google/uuid" 7 "github.com/rs/zerolog" 8 9 "github.com/onflow/flow-go/consensus/hotstuff" 10 "github.com/onflow/flow-go/consensus/hotstuff/model" 11 "github.com/onflow/flow-go/model/flow" 12 "github.com/onflow/flow-go/utils/logging" 13 ) 14 15 // TelemetryConsumer implements the hotstuff.Consumer interface. 16 // consumes outbound notifications produced by the HotStuff state machine. 17 // For this purpose, the TelemetryConsumer enriches the state machine's notifications: 18 // - The goal is to identify all events as belonging together that were emitted during 19 // a path through the state machine. 20 // - A path through the state machine begins when: 21 // -- a block has been received 22 // -- a QC has been constructed 23 // -- a TC has been constructed 24 // -- a partial TC has been constructed 25 // -- a local timeout has been initiated 26 // - Each path through the state machine is identified by a unique id. 27 // 28 // Additionally, the TelemetryConsumer reports events related to vote and timeout aggregation 29 // but those events are not bound to a path, so they are reported differently. 30 // Generally, the TelemetryConsumer could export the collected data to a variety of backends. 31 // For now, we export the data to a logger. 32 // 33 // Telemetry does NOT capture slashing notifications 34 type TelemetryConsumer struct { 35 NoopTimeoutCollectorConsumer 36 NoopVoteCollectorConsumer 37 pathHandler *PathHandler 38 noPathLogger zerolog.Logger 39 } 40 41 // Telemetry implements consumers for _all happy-path_ interfaces in consensus/hotstuff/notifications/telemetry.go: 42 var _ hotstuff.ParticipantConsumer = (*TelemetryConsumer)(nil) 43 var _ hotstuff.CommunicatorConsumer = (*TelemetryConsumer)(nil) 44 var _ hotstuff.FinalizationConsumer = (*TelemetryConsumer)(nil) 45 var _ hotstuff.VoteCollectorConsumer = (*TelemetryConsumer)(nil) 46 var _ hotstuff.TimeoutCollectorConsumer = (*TelemetryConsumer)(nil) 47 48 // NewTelemetryConsumer creates consumer that reports telemetry events using logger backend. 49 // Logger MUST include `chain` parameter as part of log context with corresponding chain ID to correctly map telemetry events to chain. 50 func NewTelemetryConsumer(log zerolog.Logger) *TelemetryConsumer { 51 pathHandler := NewPathHandler(log) 52 return &TelemetryConsumer{ 53 pathHandler: pathHandler, 54 noPathLogger: pathHandler.log, 55 } 56 } 57 58 func (t *TelemetryConsumer) OnStart(currentView uint64) { 59 t.pathHandler.StartNextPath(currentView) 60 t.pathHandler.NextStep().Msg("OnStart") 61 } 62 63 func (t *TelemetryConsumer) OnReceiveProposal(currentView uint64, proposal *model.Proposal) { 64 block := proposal.Block 65 t.pathHandler.StartNextPath(currentView) 66 step := t.pathHandler.NextStep(). 67 Uint64("block_view", block.View). 68 Hex("block_id", logging.ID(block.BlockID)). 69 Hex("block_proposer_id", logging.ID(block.ProposerID)). 70 Time("block_time", block.Timestamp). 71 Uint64("qc_view", block.QC.View). 72 Hex("qc_block_id", logging.ID(block.QC.BlockID)) 73 74 lastViewTC := proposal.LastViewTC 75 if lastViewTC != nil { 76 step. 77 Uint64("last_view_tc_view", lastViewTC.View). 78 Uint64("last_view_tc_newest_qc_view", lastViewTC.NewestQC.View). 79 Hex("last_view_tc_newest_qc_block_id", logging.ID(lastViewTC.NewestQC.BlockID)) 80 } 81 82 step.Msg("OnReceiveProposal") 83 } 84 85 func (t *TelemetryConsumer) OnReceiveQc(currentView uint64, qc *flow.QuorumCertificate) { 86 t.pathHandler.StartNextPath(currentView) 87 t.pathHandler.NextStep(). 88 Uint64("qc_view", qc.View). 89 Hex("qc_block_id", logging.ID(qc.BlockID)). 90 Msg("OnReceiveQc") 91 } 92 93 func (t *TelemetryConsumer) OnReceiveTc(currentView uint64, tc *flow.TimeoutCertificate) { 94 t.pathHandler.StartNextPath(currentView) 95 t.pathHandler.NextStep(). 96 Uint64("view", tc.View). 97 Uint64("newest_qc_view", tc.NewestQC.View). 98 Hex("newest_qc_block_id", logging.ID(tc.NewestQC.BlockID)). 99 Msg("OnReceiveTc") 100 } 101 102 func (t *TelemetryConsumer) OnPartialTc(currentView uint64, partialTc *hotstuff.PartialTcCreated) { 103 t.pathHandler.StartNextPath(currentView) 104 step := t.pathHandler.NextStep(). 105 Uint64("view", partialTc.View). 106 Uint64("newest_qc_view", partialTc.NewestQC.View). 107 Hex("newest_qc_block_id", logging.ID(partialTc.NewestQC.BlockID)) 108 109 lastViewTC := partialTc.LastViewTC 110 if lastViewTC != nil { 111 step. 112 Uint64("last_view_tc_view", lastViewTC.View). 113 Uint64("last_view_tc_newest_qc_view", lastViewTC.NewestQC.View). 114 Hex("last_view_tc_newest_qc_block_id", logging.ID(lastViewTC.NewestQC.BlockID)) 115 } 116 117 step.Msg("OnPartialTc") 118 } 119 120 func (t *TelemetryConsumer) OnLocalTimeout(currentView uint64) { 121 t.pathHandler.StartNextPath(currentView) 122 t.pathHandler.NextStep().Msg("OnLocalTimeout") 123 } 124 125 func (t *TelemetryConsumer) OnEventProcessed() { 126 if t.pathHandler.IsCurrentPathClosed() { 127 return 128 } 129 t.pathHandler.NextStep().Msg("PathCompleted") 130 t.pathHandler.NextStep().Msg("OnEventProcessed") 131 t.pathHandler.CloseCurrentPath() 132 } 133 134 func (t *TelemetryConsumer) OnStartingTimeout(info model.TimerInfo) { 135 t.pathHandler.NextStep(). 136 Float64("timeout_duration_seconds", info.Duration.Seconds()). 137 Time("timeout_cutoff", info.StartTime.Add(info.Duration)). 138 Msg("OnStartingTimeout") 139 } 140 141 func (t *TelemetryConsumer) OnBlockIncorporated(block *model.Block) { 142 t.pathHandler.NextStep(). 143 Hex("block_id", logging.ID(block.BlockID)). 144 Msg("OnBlockIncorporated") 145 } 146 147 func (t *TelemetryConsumer) OnFinalizedBlock(block *model.Block) { 148 t.pathHandler.NextStep(). 149 Hex("block_id", logging.ID(block.BlockID)). 150 Msg("OnFinalizedBlock") 151 } 152 153 func (t *TelemetryConsumer) OnQcTriggeredViewChange(oldView uint64, newView uint64, qc *flow.QuorumCertificate) { 154 t.pathHandler.NextStep(). 155 Uint64("qc_view", qc.View). 156 Uint64("old_view", oldView). 157 Uint64("next_view", newView). 158 Hex("qc_block_id", qc.BlockID[:]). 159 Msg("OnQcTriggeredViewChange") 160 } 161 162 func (t *TelemetryConsumer) OnTcTriggeredViewChange(oldView uint64, newView uint64, tc *flow.TimeoutCertificate) { 163 t.pathHandler.NextStep(). 164 Uint64("tc_view", tc.View). 165 Uint64("old_view", oldView). 166 Uint64("next_view", newView). 167 Uint64("tc_newest_qc_view", tc.NewestQC.View). 168 Hex("tc_newest_qc_block_id", tc.NewestQC.BlockID[:]). 169 Msg("OnTcTriggeredViewChange") 170 } 171 172 func (t *TelemetryConsumer) OnOwnVote(blockID flow.Identifier, view uint64, _ []byte, recipientID flow.Identifier) { 173 t.pathHandler.NextStep(). 174 Uint64("voted_block_view", view). 175 Hex("voted_block_id", logging.ID(blockID)). 176 Hex("recipient_id", logging.ID(recipientID)). 177 Msg("OnOwnVote") 178 } 179 180 func (t *TelemetryConsumer) OnOwnProposal(proposal *flow.Header, targetPublicationTime time.Time) { 181 step := t.pathHandler.NextStep(). 182 Uint64("block_view", proposal.View). 183 Hex("block_id", logging.ID(proposal.ID())). 184 Hex("block_proposer_id", logging.ID(proposal.ProposerID)). 185 Time("block_time", proposal.Timestamp). 186 Uint64("qc_view", proposal.ParentView). 187 Hex("qc_block_id", logging.ID(proposal.ParentID)). 188 Time("targetPublicationTime", targetPublicationTime) 189 lastViewTC := proposal.LastViewTC 190 if lastViewTC != nil { 191 step. 192 Uint64("last_view_tc_view", lastViewTC.View). 193 Uint64("last_view_tc_newest_qc_view", lastViewTC.NewestQC.View). 194 Hex("last_view_tc_newest_qc_block_id", logging.ID(lastViewTC.NewestQC.BlockID)) 195 } 196 step.Msg("OnOwnProposal") 197 } 198 199 func (t *TelemetryConsumer) OnOwnTimeout(timeout *model.TimeoutObject) { 200 step := t.pathHandler.NextStep(). 201 Uint64("view", timeout.View). 202 Uint64("timeout_tick", timeout.TimeoutTick). 203 Uint64("newest_qc_view", timeout.NewestQC.View). 204 Hex("newest_qc_block_id", logging.ID(timeout.NewestQC.BlockID)) 205 206 lastViewTC := timeout.LastViewTC 207 if lastViewTC != nil { 208 step. 209 Uint64("last_view_tc_view", lastViewTC.View). 210 Uint64("last_view_tc_newest_qc_view", lastViewTC.NewestQC.View). 211 Hex("last_view_tc_newest_qc_block_id", logging.ID(lastViewTC.NewestQC.BlockID)) 212 } 213 step.Msg("OnOwnTimeout") 214 } 215 216 func (t *TelemetryConsumer) OnVoteProcessed(vote *model.Vote) { 217 t.noPathLogger.Info(). 218 Uint64("voted_block_view", vote.View). 219 Hex("voted_block_id", logging.ID(vote.BlockID)). 220 Hex("signer_id", logging.ID(vote.SignerID)). 221 Msg("OnVoteProcessed") 222 } 223 224 func (t *TelemetryConsumer) OnTimeoutProcessed(timeout *model.TimeoutObject) { 225 step := t.noPathLogger.Info(). 226 Uint64("view", timeout.View). 227 Uint64("timeout_tick", timeout.TimeoutTick). 228 Uint64("newest_qc_view", timeout.NewestQC.View). 229 Hex("newest_qc_block_id", logging.ID(timeout.NewestQC.BlockID)). 230 Hex("signer_id", logging.ID(timeout.SignerID)) 231 lastViewTC := timeout.LastViewTC 232 if lastViewTC != nil { 233 step. 234 Uint64("last_view_tc_view", lastViewTC.View). 235 Uint64("last_view_tc_newest_qc_view", lastViewTC.NewestQC.View). 236 Hex("last_view_tc_newest_qc_block_id", logging.ID(lastViewTC.NewestQC.BlockID)) 237 } 238 step.Msg("OnTimeoutProcessed") 239 } 240 241 func (t *TelemetryConsumer) OnCurrentViewDetails(currentView, finalizedView uint64, currentLeader flow.Identifier) { 242 t.pathHandler.NextStep(). 243 Uint64("view", currentView). 244 Uint64("finalized_view", finalizedView). 245 Hex("current_leader", currentLeader[:]). 246 Msg("OnCurrentViewDetails") 247 } 248 249 func (t *TelemetryConsumer) OnViewChange(oldView, newView uint64) { 250 t.pathHandler.NextStep(). 251 Uint64("old_view", oldView). 252 Uint64("new_view", newView). 253 Msg("OnViewChange") 254 } 255 256 // PathHandler maintains a notion of the current path through the state machine. 257 // It allows to close a path and open new path. Each path is identified by a unique 258 // (randomly generated) uuid. Along each path, we can capture information about relevant 259 // Steps (each step is represented by a Zerolog Event). 260 // In case there is no currently open path, the PathHandler still returns a Step, 261 // but such steps are logged as telemetry errors. 262 type PathHandler struct { 263 log zerolog.Logger 264 265 // currentPath holds a Zerolog Context with the information about the current path. 266 // We represent the case where the current path has been closed by nil value. 267 currentPath *zerolog.Context 268 } 269 270 // NewPathHandler instantiate a new PathHandler. 271 // The PathHandler has no currently open path 272 // Logger MUST include `chain` parameter as part of log context with corresponding chain ID to correctly map telemetry events to chain. 273 func NewPathHandler(log zerolog.Logger) *PathHandler { 274 return &PathHandler{ 275 log: log.With().Str("component", "hotstuff.telemetry").Logger(), 276 currentPath: nil, 277 } 278 } 279 280 // StartNextPath starts a new Path. Implicitly closes previous path if still open. 281 // Returns self-reference for chaining 282 func (p *PathHandler) StartNextPath(view uint64) *PathHandler { 283 if p.currentPath != nil { 284 p.NextStep().Msg("PathCompleted") 285 } 286 c := p.log.With(). 287 Str("path_id", uuid.New().String()). 288 Uint64("view", view) 289 p.currentPath = &c 290 return p 291 } 292 293 // CloseCurrentPath closes the current path. Repeated calls to CloseCurrentPath are handled. 294 // All Details hereafter, until a new Path is started, are logged as telemetry Errors. 295 // Returns self-reference for chaining 296 func (p *PathHandler) CloseCurrentPath() *PathHandler { 297 p.currentPath = nil 298 return p 299 } 300 301 // IsCurrentPathClosed if and only if the most recently started path has been closed. 302 func (p *PathHandler) IsCurrentPathClosed() bool { 303 return p.currentPath == nil 304 } 305 306 // NextStep returns a Zerolog event for the currently open path. If the current path 307 // is closed, the event will be logged as telemetry error. 308 func (p *PathHandler) NextStep() *zerolog.Event { 309 if p.currentPath == nil { 310 l := p.log.With().Str("error", "no path").Logger() 311 return l.Error() 312 } 313 l := p.currentPath.Logger() 314 return l.Info() 315 }