github.com/koko1123/flow-go-1@v0.29.6/consensus/hotstuff/notifications/telemetry.go (about)

     1  package notifications
     2  
     3  import (
     4  	"github.com/google/uuid"
     5  	"github.com/rs/zerolog"
     6  
     7  	"github.com/koko1123/flow-go-1/utils/logging"
     8  
     9  	"github.com/koko1123/flow-go-1/consensus/hotstuff"
    10  	"github.com/koko1123/flow-go-1/consensus/hotstuff/model"
    11  	"github.com/koko1123/flow-go-1/model/flow"
    12  )
    13  
    14  // TelemetryConsumer implements the hotstuff.Consumer interface.
    15  // consumes outbound notifications produced by the The HotStuff state machine.
    16  // For this purpose, the TelemetryConsumer enriches the state machine's notifications:
    17  //   - The goal is to identify all events as belonging together that were emitted during
    18  //     a path through the state machine.
    19  //   - A path through the state machine begins when:
    20  //     -- a vote is received
    21  //     -- a block is received
    22  //     -- a new view is started
    23  //     -- a timeout is processed
    24  //   - Each path through the state machine is identified by a unique id.
    25  //
    26  // Generally, the TelemetryConsumer could export the collected data to a variety of backends.
    27  // For now, we export the data to a logger.
    28  //
    29  // Telemetry does NOT capture slashing notifications
    30  type TelemetryConsumer struct {
    31  	NoopConsumer
    32  	pathHandler *PathHandler
    33  }
    34  
    35  var _ hotstuff.Consumer = (*TelemetryConsumer)(nil)
    36  
    37  func NewTelemetryConsumer(log zerolog.Logger, chain flow.ChainID) *TelemetryConsumer {
    38  	return &TelemetryConsumer{
    39  		pathHandler: NewPathHandler(log, chain),
    40  	}
    41  }
    42  
    43  func (t *TelemetryConsumer) OnReceiveVote(currentView uint64, vote *model.Vote) {
    44  	// TODO: update
    45  	//       As of Consensus Voting V2, receiving a vote is not an event within the HotStuff state machine anymore.
    46  	//t.pathHandler.StartNextPath(currentView)
    47  	// t.pathHandler.NextStep().
    48  	// 	Uint64("voted_block_view", vote.View).
    49  	// 	Hex("voted_block_id", vote.BlockID[:]).
    50  	// 	Hex("voter_id", vote.SignerID[:]).
    51  	// 	Msg("OnReceiveVote")
    52  }
    53  
    54  func (t *TelemetryConsumer) OnReceiveProposal(currentView uint64, proposal *model.Proposal) {
    55  	block := proposal.Block
    56  	t.pathHandler.StartNextPath(currentView)
    57  	step := t.pathHandler.NextStep()
    58  	step.
    59  		Uint64("block_view", block.View).
    60  		Hex("block_id", logging.ID(block.BlockID)).
    61  		Hex("block_proposer_id", logging.ID(block.ProposerID)).
    62  		Time("block_time", block.Timestamp)
    63  	if block.QC != nil {
    64  		step.
    65  			Uint64("qc_block_view", block.QC.View).
    66  			Hex("qc_block_id", logging.ID(block.QC.BlockID))
    67  	}
    68  	step.Msg("OnReceiveProposal")
    69  }
    70  
    71  func (t *TelemetryConsumer) OnEventProcessed() {
    72  	if t.pathHandler.IsCurrentPathClosed() {
    73  		return
    74  	}
    75  	t.pathHandler.NextStep().Msg("PathCompleted")
    76  	t.pathHandler.NextStep().Msg("OnEventProcessed")
    77  	t.pathHandler.CloseCurrentPath()
    78  }
    79  
    80  func (t *TelemetryConsumer) OnStartingTimeout(info *model.TimerInfo) {
    81  	if info.Mode == model.ReplicaTimeout {
    82  		// the PaceMarker starts a new ReplicaTimeout if and only if it transitions to a higher view
    83  		t.pathHandler.StartNextPath(info.View)
    84  	}
    85  	t.pathHandler.NextStep().
    86  		Str("timeout_mode", info.Mode.String()).
    87  		Float64("timeout_duration_seconds", info.Duration.Seconds()).
    88  		Time("timeout_cutoff", info.StartTime.Add(info.Duration)).
    89  		Msg("OnStartingTimeout")
    90  }
    91  
    92  func (t *TelemetryConsumer) OnEnteringView(viewNumber uint64, leader flow.Identifier) {
    93  	t.pathHandler.NextStep().
    94  		Uint64("entered_view", viewNumber).
    95  		Hex("leader", leader[:]).
    96  		Msg("OnEnteringView")
    97  }
    98  
    99  func (t *TelemetryConsumer) OnReachedTimeout(info *model.TimerInfo) {
   100  	t.pathHandler.StartNextPath(info.View)
   101  	t.pathHandler.NextStep().
   102  		Str("timeout_mode", info.Mode.String()).
   103  		Time("timeout_start_time", info.StartTime).
   104  		Float64("timeout_duration_seconds", info.Duration.Seconds()).
   105  		Msg("OnReachedTimeout")
   106  }
   107  
   108  func (t *TelemetryConsumer) OnBlockIncorporated(block *model.Block) {
   109  	t.pathHandler.NextStep().
   110  		Hex("block_id", logging.ID(block.BlockID)).
   111  		Msg("OnBlockIncorporated")
   112  }
   113  
   114  func (t *TelemetryConsumer) OnFinalizedBlock(block *model.Block) {
   115  	t.pathHandler.NextStep().
   116  		Hex("block_id", logging.ID(block.BlockID)).
   117  		Msg("OnFinalizedBlock")
   118  }
   119  
   120  func (t *TelemetryConsumer) OnQcTriggeredViewChange(qc *flow.QuorumCertificate, newView uint64) {
   121  	t.pathHandler.NextStep().
   122  		Uint64("qc_block_view", qc.View).
   123  		Uint64("next_view", newView).
   124  		Hex("qc_block_id", qc.BlockID[:]).
   125  		Msg("OnQcTriggeredViewChange")
   126  }
   127  
   128  func (t *TelemetryConsumer) OnProposingBlock(proposal *model.Proposal) {
   129  	block := proposal.Block
   130  	step := t.pathHandler.NextStep()
   131  	step.
   132  		Uint64("block_view", block.View).
   133  		Hex("block_id", logging.ID(block.BlockID)).
   134  		Hex("block_proposer_id", logging.ID(block.ProposerID)).
   135  		Time("block_time", block.Timestamp)
   136  	if block.QC != nil {
   137  		step.
   138  			Uint64("qc_block_view", block.QC.View).
   139  			Hex("qc_block_id", logging.ID(block.QC.BlockID))
   140  	}
   141  	step.Msg("OnProposingBlock")
   142  }
   143  
   144  func (t *TelemetryConsumer) OnVoting(vote *model.Vote) {
   145  	t.pathHandler.NextStep().
   146  		Uint64("voted_block_view", vote.View).
   147  		Hex("voted_block_id", vote.BlockID[:]).
   148  		Hex("voter_id", vote.SignerID[:]).
   149  		Msg("OnVoting")
   150  }
   151  
   152  func (t *TelemetryConsumer) OnForkChoiceGenerated(current_view uint64, qc *flow.QuorumCertificate) {
   153  	t.pathHandler.NextStep().
   154  		Uint64("block_view", current_view).
   155  		Msg("OnForkChoiceGenerated")
   156  	// Telemetry does not capture the details of the qc as the qc will be included in the
   157  	// proposed block, whose details (including the qc) are captured by telemetry
   158  }
   159  
   160  func (t *TelemetryConsumer) OnQcConstructedFromVotes(curView uint64, qc *flow.QuorumCertificate) {
   161  	t.pathHandler.StartNextPath(curView)
   162  	t.pathHandler.NextStep().
   163  		Uint64("curView", curView).
   164  		Uint64("qc_block_view", qc.View).
   165  		Hex("qc_block_id", qc.BlockID[:]).
   166  		Msg("OnQcConstructedFromVotes")
   167  }
   168  
   169  func (t *TelemetryConsumer) OnQcIncorporated(qc *flow.QuorumCertificate) {
   170  	t.pathHandler.NextStep().
   171  		Uint64("qc_block_view", qc.View).
   172  		Hex("qc_block_id", qc.BlockID[:]).
   173  		Msg("OnQcIncorporated")
   174  }
   175  
   176  // PathHandler maintains a notion of the current path through the state machine.
   177  // It allows to close a path and open new path. Each path is identified by a unique
   178  // (randomly generated) uuid. Along each path, we can capture information about relevant
   179  // Steps (each step is represented by a Zerolog Event).
   180  // In case there is no currently open path, the PathHandler still returns a Step,
   181  // but such steps are logged as telemetry errors.
   182  type PathHandler struct {
   183  	chain flow.ChainID
   184  	log   zerolog.Logger
   185  
   186  	// currentPath holds a Zerolog Context with the information about the current path.
   187  	// We represent the case where the current path has been closed by nil value.
   188  	currentPath *zerolog.Context
   189  }
   190  
   191  // NewPathHandler instantiate a new PathHandler.
   192  // The PathHandler has no currently open path
   193  func NewPathHandler(log zerolog.Logger, chain flow.ChainID) *PathHandler {
   194  	return &PathHandler{
   195  		chain:       chain,
   196  		log:         log.With().Str("hotstuff", "telemetry").Str("chain", chain.String()).Logger(),
   197  		currentPath: nil,
   198  	}
   199  }
   200  
   201  // StartNextPath starts a new Path. Implicitly closes previous path if still open.
   202  // Returns self-reference for chaining
   203  func (p *PathHandler) StartNextPath(view uint64) *PathHandler {
   204  	if p.currentPath != nil {
   205  		p.NextStep().Msg("PathCompleted")
   206  	}
   207  	c := p.log.With().
   208  		Str("path_id", uuid.New().String()).
   209  		Uint64("view", view)
   210  	p.currentPath = &c
   211  	return p
   212  }
   213  
   214  // CloseCurrentPath closes the current path. Repeated calls to CloseCurrentPath are handled.
   215  // All Details hereafter, until a new Path is started, are logged as telemetry Errors.
   216  // Returns self-reference for chaining
   217  func (p *PathHandler) CloseCurrentPath() *PathHandler {
   218  	p.currentPath = nil
   219  	return p
   220  }
   221  
   222  // IsCurrentPathOpen if and only is the most recently started path has been closed.
   223  func (p *PathHandler) IsCurrentPathClosed() bool {
   224  	return p.currentPath == nil
   225  }
   226  
   227  // NextStep returns a Zerolog event for the currently open path. If the current path
   228  // is closed, the event will be logged as telemetry error.
   229  func (p *PathHandler) NextStep() *zerolog.Event {
   230  	if p.currentPath == nil {
   231  		l := p.log.With().Str("error", "no path").Logger()
   232  		return l.Error()
   233  	}
   234  	l := p.currentPath.Logger()
   235  	return l.Info()
   236  }